{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Standard Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdkit import Chem\n",
    "from rdkit.Chem.Draw import IPythonConsole\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#For debugging, reimport modules when executing cells\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Working with the SMILES based vectorizer\n",
    "\n",
    "The SMILES based vectorizer uses the SMILES format to produce a sequence of one hot encoded characters suited for modelling with sequence oriented neural network architectures such as transformers and RNNs. Data augmentation is done via atom order permutation and generatiion of non-canonical SMILES.\n",
    "\n",
    "Reference: https://arxiv.org/abs/1703.07076\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import the SmilesVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from molvecgen import SmilesVectorizer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Work with some molecules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "smiles = [ \"CCC(=O)O[C@@]1(CC[NH+](C[C@H]1CC=C)C)c2ccccc2\",\n",
    "            \"CCC[S@@](=O)c1ccc2c(c1)[nH]/c(=N/C(=O)OC)/[nH]2\"]*10\n",
    "          \n",
    "mols =   [Chem.MolFromSmiles(smile) for smile in smiles]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAeFElEQVR4nO3deVQT5/oH8CcJ+y4BA4iIiK1I60bdQAVsFNFQLQW9omlrrxerPdJ77VFO2+tBLdfi7WLsdQFrW9O6xv5UwIUKAhXrSpEKqCCLIhAihABKWEIyvz/GpmEVyTJJeD6nx0NfJvM+0+Wbd2bed4ZGEAQghBAaLDrVBSCEkGHDGEUIIbVgjCKEkFowRhFCSC0YowghpBYTqgtAyDAUFhampqb6+PgsWbKE6lqQfsHRKEK9KCgo8Pb2Dg4OVrbk5+d/8sknAoGAwqqQfsLRKEK9sLGxKSsr6+joULa4ubkBQE1NDXVFIT2Fo1GEeuHm5kaj0WpraxUKhbIFMEZRbzBGEeqFubm5o6OjTCarq6sjW8gYra6uprQupI8wRhHqXbfhp52dnY2NjVQqbWpqorQupHcwRhHqXc+zeDyvR73CGEWod2RoCoXCbi0Yo6gbjFGEeoejUTRAGKMI9c7V1RW6jkbJFoxR1A3GKEK96zn27BmsCAHGKEJ9wZN6NEAYowj1DmMUDRDGKEK9c3V1pdPpIpFILpeTLRijqFcYowj1zsTExNnZWS6Xi0QismXEiBEAUFNTg28wQ6owRhHqU7fhp5WVlb29fXt7e0NDA6V1If2CMYpQn/DyKBoIjFGE+oQxigYCYxShPuF6UDQQGKMI9annsqWewYoQxihCfcKFTGggMEYR6hNeG0UDgTGKUJ8wRtFAYIwi1CcWi2ViYlJXV6d8tx3GKOoJYxShPtHpdBaLRRBEbW0t2UK+6k4oFCpfdYcQxihC/ek2/FS+6q6+vp7SupAewRhFqD94eRQ9F8YoQv3BGEXPhTGKUH96ThTFGEXdmFBdAEJ6jQzNysrK6OhoNpu9dOnSkJAQBweHV155herSkL6g4ZMTEeqHWCz+9ddfP/7445KSEhcXl4qKCgsLC6qLQvoFT+oR6hNBEIcOHYqKiiopKfHx8UlLS8MMRT3hST1Cvaurq1u1atXZs2cBgMvlJiYmWllZUV0U0kcYowj1IiMj4+233xYKhQ4ODklJSUuXLqW6IqS/8KQeoS5kMtmWLVtCQkKEQmFwcHBhYSFmKOof3mIyJGlpaTt27HB0dGQymU5OTkwmk/xZ9U8TEzzDGLx79+5FRUXdunXLxMTk008/3bx5M4PBoLoopO/wfzlDUlpamp2d3f82FhYWw7pyc3NzdXXt1shisTAguvnxxx/XrVvX0tLi6el5+PBhf39/qitChgFHo4aktrb27t27DQ0N9fX1YrG4oaFB+afyB+VL1ftnYmKiHMDOmjXrs88+G8rD2KamprVr1x49ehQAIiMj9+/f7+DgQHVRyGBgjBqb1tZWSVdCobCmpqZbo0gkUj6jyMvLy9TUtKCgwNTUlNriKXHt2rWoqKiKigo7O7s9e/asXLmS6oqQgcEYHaJkMplyGPvee++VlpZ+//33q1atorouners7IyPj4+Pj5fL5VOnTj1y5Ii3tzfVRSHDgzGK4NChQ1wud8yYMffu3Rs6p/aVlZUrVqy4fPkyjUZbv379l19+OTQH40h9GKMI5HK5j4/P/fv3jxw5snz5cqrL0YWff/45OjpaIpG4u7sfOnQoMDCQ6oqQAcN5owgYDMamTZsAID4+3ugf6t7a2vrhhx9GRkZKJJIlS5bk5+djhiI14WgUAQDIZLKxY8c+fPjw1KlTS5YsobocbcnNzY2Kirp//76FhUVCQsKHH35IdUXIGOBoFAEAmJqafvTRRwCwbds2o/xmJQhi165dAQEB9+/f9/X1vXHjBmYo0hQcjaJn2travLy8hELh+fPnFyxYQHU5miQSiVatWnX+/HkajfaPf/xj586d+JARpEEYo+gv//3vf2NjY2fOnHnlyhWqa9EAgiAEAoFQKLxw4cL58+fNzMz8/PyM49CQXsEYRX8h10HW19dnZ2cbx40XLpfLZDJHjBiRl5cXERExceJEnBmKNA5jFHWxbdu2uLi4efPmXbhwgepaEDIMGKOoi6amJk9Pz8bGxt9++80Ins2Rn59/6tSpqqqq2traqqoqR0fHrKwsqotCxmaoLFlBA2Rvb7927drPP/98x44dycnJVJejro6ODgaD4e/v7+Li4u7uTr6fDiHNwtEo6k4sFnt6era0tOTm5k6ZMoXqchDSdzhvFHXHZDJXr15NEMSOHTuorkUD4uPjw8PD/f39PTw8Jk+eTHU5yAjhaBT1QigUenl5dXR03L5929fXl+py1PLzzz/T6XTypN7FxcXMzIzqipCxwRhFvVu7dm1iYuI777xz8OBBqmvRmLa2NnxDMtI4jFHUu8rKSm9vb4VCce/ePYOea3nnzp1169aJRKKqqqqxY8fm5eVRXREyNhijqE/vvvsun89fs2ZNYmIi1bUMnkQi+eOPP1gslru7u62tLdXlICOEMYr6VFpaOm7cOAaDcf/+fQ8PD6rLUZdMJmtsbHR2dqa6EGRs8E496pO3t/dbb73V0dGxc+dOqmtRy8aNG11cXKytrd966y2qa0FGCEejqD9FRUUTJkywsLCoqKgYPnw41eUMUk1NDZ1OZ7FYNBqN6lqQEcLRKOqPr68vh8ORSqU8Ho/qWgbPzc3NxcWlrq6utLSU6lqQEcLRKHqOGzduTJ8+3c7O7sGDB8OGDaO6nMFoamoaPny4vb39tGnTzpw5Q3U5yNjgaBQ9x7Rp06ZPn97c3PzNN99QXcsg2dvbNzc3P378GDMUaQM+mgQ9x44dO65fvw4AX3/9tbe3t7u7u6OjI5PJZDKZ5ubmVFc3UOQEWKlUik8JQBqHJ/WoT52dnWvXrj1w4ACDwfDx8aHT6bdv31bdwNramslkOjo6Ojk5Mf8UPmrUJDMzcHQEJhOYzGc/9OXgQdi+HUxMYPdumDtXe8eyc+fOpKSkyZMnHz16VHu9oKEJYxT1TiqVLlu27MyZMxYWFj/99FNERMSbb77Z0NAAAA0NDWKxuKGhob29vecHbwQGTv311y5NNFqXSCX/DAmBSZNgyhQoKID6eggOhspKYDB0c3QIaRCe1KPe1Nenbdhw5swZJpOZmpo6bdq0tWvXnj592snJqayszM7Ojtzq6dOnZJ7W19eLxWLyZ0cA8PAAsRjEYmhoePZnfT3U13fpws4Onj4Ff39wdARHR3BygqIimDBBe8d08+bNlpaWoKAg7XWBhiaMUdRDRQWEhoaXle1etIj91Veenp7Lly8/ceKEubn53r17lRkKADY2NjY2NqNGjXrODhWKvyJV+cPMmZCXB8o1RcOHQ22tVmN0+/btzs7OGKNI4zBGUVcFBRAaCtXV8OqrHyQmSqyt582bl5OTM2zYsOTk5NmzZw9mn3Q6ODtDz1WYqk8J0f7FpVOnTmm7CzQ04YQnpCIzE2bNgupqmDsXcnKARrMJC7OtrnZzc8vOzh5khvbDxQXq6p79XFcHzs7www+wezc0N2u4IwAAePTokRG8FgXpIYxRpOKbb6C5GaKi4Px5ePgQpk0z/e23n728bty4MUEbp9sBAXDlCjQ0QEEBPHwIb74J770H69fDJ59ovi+A+vr6nJwcbewZDXF4px6paGmB776D9eshOxvefBOamiAoCE6fBnt7bfW4fz98+ik0NkJn51+NLBZUV+Nde2QocDSKAA4ehJdegvHj4fp1iImB1FQIDYWmJli2DNLStJihAGBrC/X1XTIUAEQiyM7WRm8ZGRna2C0a4vAW05AnEsGnn3aZvPnqqzBsGCxdCjt3Al3LX7Rnz/beLhDA669rvLeWlhaN7xMhPKkf8v7v/+DYMThxAgBg4kT46SeYMAFEImCxdNG7VPpskmk3w4ZBbS3g6+eQIcCT+iGvtrb75E0AHWUoAFhZPUvwbiQSyMzUbFfJyclbt26dPXv2vn37pFKpZneOhjKMUaSCklOT4GB4991e2o8f11QPra2tH3zwwZIlS7Zu3Xr58uV169Z5eHhs3ry5lvzOQEg9GKNDXrfJm66uFNTA44GbW/fGkyehrU39fd+5c2fGjBl79+41NTXdvHlzcnIym80Wi8Xx8fGjRo16++23CwsL1e8FDWkEGuKEQsLNjRCLieJiws2N6OykpgyBgADo/ldyspp75fP5VlZWADBu3Li8vDxle25uLpfLNTF5dos1ICAgJSVFoVCo2R0amjBGEUH88APh7U2MG0dcvNjZ2Xn16tVOSsJ08eLuMRoVNeid1dXVhYWFkSnJ5XKfPn3ac5vy8vLY2Fj7P2d0TZgwISkpqbW1VY1jQEMRxigiCIIQiUQCgYDL5ZKvCQkNDd24caOui6iqIuztu8SoqSkhlQ5iTxcvXhwxYgQA2NvbHzlypP+Nm5qaeDzeyJEjyTBlsVhxcXFisXhQx4CGIozRoaujoyMrKys2NnbixImqr8z08PAgz3Y3bNig6/PcxETVGO0EyDp27IV2IJPJ4uLi6HQ6AMyYMaO8vHyAH2xvbxcIBFOnTiX/IdjY2ERHR9+7d+/FjwENORijQ45QKOTz+ZGRkQ4ODsrotLKyYrPZCQkJd+7cIQiCfFozAKxZs0Yul+uuOIWCCAwkM1QOsAsgIiJi4J+uqKiYOXMmAJiYmMTFxQ3u0kROTg6HwyG/V+h0OofDuXz58iD2g4YOjNEhQSaT5eTkxMbG+vn5qQ48vby8oqOjU1JS2traun3k3LlzZJKuXr1al0n69PbtQwzGDwDj/sz3J0+eDOSDfD7fxsaGHE3n5OSoWUZJSUlMTIylpSX5D8rPz4/P58tkMjV3i4wSxqgxKysrS0pKioyMVH3WsrW1NZvN5vF4Dx8+7P/jWVlZZDAtX75clwmyfft21ckkA7m4uWLFCnLjiIiIhoYGTVUiEoni4uKYf75LavTo0Twer9e7VWgowxg1NlKpND09nRx4qobR+PHjY2Nj09PT29vbB763S5cu2draAsDSpUs7Ojq0V7YqmUw2adIkZeWLFy/uZ+Pr16+PGTMGAGxtbZOSkrRRT1tbG5/PHzeOHB+DnZ1dTEzMo0ePtNEXMkQYo0ZCOfAkx48kR0fHyMjIpKQkdf6fv3z5MjmY5XA4Pc/9tSQ/P9/U1JQ8CnNzc4lE0nMbuVzO4/HIzV577bWSkhKtliSXy1NSUthsNlmVmZkZl8u9ffu2VjtFBgFj1LC1tLS8//77np6eyuik0+nTpk2Li4sb9PTPpKSkH3/8UbUlNzeXPLFduHChzqZVbty4UXlQBw8e7PbbysrKOXPmAACNRouJiXmhIbaafv/9d+XU/cWLFz9+/FhnXSP9hDFq2P71r3+RAefk5EQOPGtqatTZYX5+Pp1Op9PpBw4cUG3Py8tzcnICgJCQEOmg5nK+KKlU6u3tTcZoaGio6q9Onjzp6OgIACwW69y5czoopqfy8vKAgAAAoGCCLdIzGKMGrLi4GAAcHBxu3rypwZvpX3zxBTnK27Vrl2r7nTt3XF1dAWDOnDnNzc2a6q4fWVlZ5LwCU1PT+vp6giCkUmlMTAyZrfPnzxcKhToooy98Ph8AVqxYQWENSB9gjBqwpKQkAFi2bJnG97x3714ajUaj0b7++mvV9rt377q5uQHArFmzmpqaNN5vT3//+9/J0Ny/f39hYeGrr74KABYWFjwej/Il8Onp6QAQHBxMbRmIchijBuxvf/sbAOzbt08bO09KSiLXAm3btk21vbi42N3dnbyro4MVk42NjeSyzpdeesnc3BwAfHx88vPztd3vQBQVFQHAyy+/THUhiGIYowaMHBhqb8HioUOHGAwGAMTGxqq2P3jwwMvLCwCmTJlCnmtrSVtbW0ZGxhtvvEEOSGk02rp163RzZXYgJBIJANjY2FBdCKIYxqihunPnDgC4urpqtZejR4+St6S7JenDhw/J+z+TJk3S+K3q8vLynqsGAOA///mPZjtSn7W1NQDo5kox0lsYo4Zqz549ABCl8ii5AwcOvPLKKz/88INmOxIIBOTczI8++ki1XSgU+vr6AsC4ceOqq6vV7KWvVQNeXl4xMTFBQUEAcPjwYTV70TjyuwSfYDLE4ZtBDVV2djYABAcHK1syMzMLCws1/pahyMhIS0vLiIiIr776qqWlZc+ePeQ1UxcXl4sXL86bN6+goGDu3LkZGRnkNdMXUl5enpGRkZGRkZaW9uTJE7LRxsYmKCgoLCxs4cKF5D43bNiQnZ1dU1Oj2UNTn5ubW2lpaU1Nzcsvv0x1LYgyGKMGiSCIS5cuAQA5TCP9+uuv0DVYNYXD4Zw6dSo8PDwxMVEulycmJpJJymKxsrOzQ0JCcnNzZ8+enZmZOXr06OfuTSqVXrlyJSMjIyUl5e7du8r28ePHh4WFsdnswMBA5RImEjnRSigUavrI1EVentbDfEe6hDFqkIqKikQikbu7u3KCenFxcXV19fDhw5VLvzUrNDT0/PnzYWFh3377bUtLC5/PJ6+ZOjo6pqenL1iw4Pr160FBQZmZmeQK957IgWdqaiq5rp9sdHJyCg4OZrPZHA7HrefrmP6kt2mlt4UhXcIYNUhZWVnQdeBJtsydO1f1OXiaFRQUdO7cuUWLFh05ckQulx86dIhMUgcHh19++SU0NPTq1auzZ8/OyMgYP348+ZGWlparV6+mpqaePn26srKSbGQwGH5+fmR0+vv7kwPb/ultWpHDZD0sDOkSxqhB6itGtXFGr2r27Nnnz59fuHDh8ePH5XL5kSNHyLNve3v79PT0sLCwrKysuXPnJiYmFhcXZ2RkXLp0qaOjg/zs8OHDAwMDORwOh8Mhl3IOnN7GqJfX3MDAnQQxnepCEKWovseFXphcLieXt1dUVJAtCoWCxWIBQHFxsQ4KyM3NJXNw0aJFqk8qefr0KZnjyjdukgPPuLi43NxcdRYdkXefLC0tNVG+JmVlEQDErFlU14EohTFqeG7dugUAHh4eyhZyOY2255CqUj7z6ejRo6rtQqGQTqdbWFisXr365MmTGlwwSj72tNcn5lGouJgAILy8qK4DUQpP6g0POdXp9ddfV7YoL4zqrAY/P7/MzMy0tDRyQarSjRs3FArFtGnTvv32W8326ObmVlxcXFNTo/oKKcqRd8X0bwYB0qnnX91H+oYMTdWpTrq5MNrNhAkTNm3a1K1Re5Xo5+VRGxuws4PWVpBIqC4FUQdj1MDI5fKcnBwACAwMJFsIgtDejNEXNdRiFP4ckOpfXUh3MEYNTH5+vkQiGTNmzKhRo8iWgoKC+vr6kSNHko8LoVBDQ0NBQYGFhcX06Zq/c40xivQWxqiB6Tnc67kqlCrZ2dkKhcLf3598M7Nm6fFCJgCM0aENY9TAUDVjdCDIQFe9aKtBOBpFegtj1JB0dnZevnwZVC6MKhQK8lKplsLrhWg10PU2Rl1dAfBm/dCGMWpI8vLympubX3rpJfKB8ADwxx9/iMXi0aNHq74clBJ1dXVFRUVWVlZTp07Vxv71NkZxNIowRg2Jnl8YJQgiICCAfNWHxiljlCAIbex/0NzcwNwcFAqq60DUwen3hqSvC6P6cEav1QujAGBpaTls2DCJRCIWi8m1sHrC3x/a2qguAlEKR6MGo7Oz88qVKzQaTXlhtOccUgrp4E6Xfj5OaQAPqEJGDv8TMBg3btx48uSJj4+Pi4sL2VJeXi6Tyby9vT08PKit7fHjx/fu3bOxsXnttde014veXh5FQxzGqMHoOdwbO3asRCJJS0ujrqhnMjMzCYKYNWtWt6fWa5YBxahYDMeOUV0E0hWMUYPR68VHU1PTvp42r0u6mbuqDzF67BiYmYFYDADAZkN+Phw7BqtXP/vtrFmQmwsA0NoK9+5RViTSMYxRg2FjY2NqatrPmzYopO37SyQ9WchkZfWckWZiInz8MZw5A++/Dy0tuioLUQdj1GD4+fnJZLLw8PDCwkKqa+lCKBSWlJTY2tpOmTJFqx3pw2gUAMLC4MSJ/jZ4/334/HPgcCAxEaytdVUWog7GqMHYsGHD/PnzRSLRnDlzbt68SXU5f8nMzASAOXPmKB96ryV6EqP29sBiwf37f7UcPw6enuDp+eyMHgDc3WHLFiqKQ1TAGDUYVlZWqampixcvlkgk8+fPv379OtUVPaOzRf16EqMAwOXCTz/99bfLlsGDB/DgAfQ6T0Ek0lldiBoYo4bEzMxMIBCEh4c3NjaGhIRcuXKF6ooAdBujNBqttrZWLpdru6/+LVgAFy7AQJZTXbwIHh6wdCno0/kD0jCMUQNDJunKlSubmprmz59PnlBT6NGjR+Xl5fb29hMnTtR2X2ZmZkwmUy6Xf/755+RL7qhiYgLTp8Pvvz9/y99/B7kcTpyA6dMhJGSg4YsMC8ao4WEwGAcPHnznnXdaWlrCwsLS09MpLIYcigYGBjIYDB10l52dbWZmtnnz5pEjR27atKmqqkoHnZKKiiA8HNrbn/3t229DU9PzP7VpE5SWQmws2NvDhQsQEgLjxsGuXdDaqtVikW5R+0Y9NGhyufy9994DAHNz8+TkZB302NLS0rPx3XffBYCdO3fqoACCIBQKRXp6OofDIf/rZTAYHA7n2rVrWu6U2LOHsLQkAIhPPhnkTiQSYscOwt2dACAACFdXYvt2Qixu12iliBoYowZMoVCsX78eAMzMzE6ePKmlXsrKyng8HpvNtrS0rK+v7/Zb8gF9+fn5Wuq9L7du3eJyucpFUwEBAQKBoLOzU+Md1dURb7zxLPu4XOLJE7X21tFBCATE9OkEAOHg0MBkOnG53Lt372qoWEQNjFHDplAo/vnPfwKAqanpiRMnNLVbsVh8/PjxVatWkTPeSSYmJr/88ovqZuXl5QDg6Ogol8s11fULEQqFcXFxw4YNIyv09vbm8Xi9jpoHJzOTGDGCACDs7YkjRzS1V4IgiIwMYu3aozQaDQDodHp4ePhvv/2myQ6QDmGMGoN///vf5Bkun89XZz+FhYUJCQlsNlt1aTyLxeJyuQKBQCKRdNv+u+++A4Dw8HB1OlVfc3Mzj8dTvuPP2dk5Nja2urpanX3KZERcHEGnEwDEjBlEWZmmiu2ipKQkJibG0tKSrNzPz4/P58tkMq10hrQGY9RIJCQkkEn6/fffv9AH6+rqBAJBdHS06jJTExOTgICAhISE3NxchULR12e5XC4AfPPNN2qXrwFyuTwlJWXmzJnkIZibm3O53KKiokHs6v79ihkzFACEiQmxZQuhhUsFXYhEori4OOVDVEePHp2QkNDzSwvpLYxR40EmKY1G2717d/9bdnZ25ubmJiQkBAQE0FWelzl69Ojo6GiBQNDY2Nj/Hqqqqg4cOGBvbw8ABQUFmjsIDcjJyYmMjCRnDtBoNDabnZKSMvCPCwQCBweHwMAsDw/i0iXtldldW1sbn8/38fEh/11cuXJFd30j9WCMGpUvvviCzA4ej9fXNp999pnyYiIAWFpaLliwgMfjFRcX979zMnzj4uL8/PzIi3omJiaJiYn9DFcpVFpaGhMTY2VlRR7mpEmT+Hx+R0dHPx9pbGyMiooit1++fKVEQsFxyeXy06dPr1+/Xvddo0HDGDU2+/btIzMuPj6+1w127NgBAF5eXuTAs7m5uf8dVlRU7Nu3b/HixTY2NsrwtbW1XbJkSWJi4hM1b11rWV1dXUJCgvINgK6urnFxcQ0NDT23vH79ure3N/m90s+XEEI9YYwaof3795On6lu3bu3528ePH5c9745Ja2trenp6bGysn5+f6ixjLy+vmJiY9PT0trY27dSuFe3t7Xw+39fXV/kdEBMT8+DBA/K3crmcx+ORd9X8/PyeOypHqBuMUeN0+PBh8nlLsbGxA/9UWVlZUlJSZGSkra2tMjqtra05HE5SUlJlZaX2CtaNnJwcDoejnGbE4XBOnz5NvsmKRqPFxMS0t+N8ePTCMEaN1tGjR8kk3bhxYz+bSaVScuA5fvx41YHn+PHjY2Nj09PTjS9Z8vLyVq5cSQ4/yddBu7q6Xrhwgeq6kKGiEfikBON14sSJFStWyGSydevW7d69mxyFkcrLyzMyMlJTUzMyMtr+fEEwk8mcO3cum81etGiR8nqisaqqqvrf//5naWlZV1e3ZcsWZ2dnqitChgpj1MidPXs2IiKira1tzZo1X3755bVr11JTU5OTkx8+fEhuQKfTJ0+ezGaz2Wx2UFCQth+9jJDxwRg1fsokZTAYyid1sliskJCQBQsWzJ8/n8lkUlshQgYNY3RISE9PP3v2bFJSkq+vL5vN5nA4/v7+qhPvEUKDhjE6hEilUuV0dISQpmCMIoSQWvC0DiGE1IIxihBCasEYRQghtWCMIoSQWv4fJf61Wn5baM8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<rdkit.Chem.rdchem.Mol at 0x2b5a95a296c0>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mols[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create the object and fit the characterset and length. The object is called a SMILES vectorizer, but currently only work directly from lists of RDKit molecules. It works by generating the SMILES of the molecule with subsequent one hot encoding into a numpy array. The .fit() function analyses the dataset for which characters are used by the SMILES and updates the character set of the vectorizer as well as adjusting the length of the embedding."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Default Charset @C)(=cOn1S2/H[N]\\^$?\n",
      "Default Maximum allowed SMILES length 120\n",
      "\n",
      "After fitting\n",
      "Charset after fit ]\\(S)[2=ONHCc@n/1+^$?\n",
      "Maximum allowed SMILES length 45\n"
     ]
    }
   ],
   "source": [
    "sm_en = SmilesVectorizer(canonical=True, augment=False)\n",
    "\n",
    "print(\"Default Charset %s\"%sm_en.charset)\n",
    "print(\"Default Maximum allowed SMILES length %s\"%sm_en.maxlength)\n",
    "\n",
    "sm_en.fit(mols, extra_chars=[\"\\\\\"])\n",
    "print()\n",
    "print(\"After fitting\")\n",
    "print(\"Charset after fit %s\"%sm_en.charset)\n",
    "print(\"Maximum allowed SMILES length %s\"%sm_en.maxlength)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The molecules can be transformed to vectors. The first one is plotted as \"piano roll\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a97bf42e8>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAECCAYAAAAcvsaeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADdtJREFUeJzt3V+MpXdZB/Dv4+62FbChC7TpPwVNNfQCl2TTkuBFpWILGouJJBA1vSBZLyCBBGMqN6iJCV4I3BCTKg29QJQISGOMtVkx1cSsLLBCSYVWglB30wVWsiixtOXxYg4wbafsmTn/f/P5JJNz3nfeOe8z5zk7893feef3q+4OAMBofmTVBQAALIKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJBWGnKq6taq+kJVPVxVd6yyFn6gqu6qqrNV9cC2fYer6r6qemhye9kqaySpqmur6hNV9WBVfb6q3jrZr1drpqouqap/rap/m/Tq9yf7X1JVJya9+suqumjVtZJU1YGq+kxV/c1kW5821MpCTlUdSPK+JK9Jcn2SN1bV9auqh6f4QJJbn7bvjiTHu/u6JMcn26zWE0ne3t0vTfKKJG+e/BvSq/XzWJJXdffPJjmS5NaqekWSP0rynkmv/jvJm1ZYIz/w1iQPbtvWpw21ypGcG5I83N1f6u7vJPmLJLetsB4muvv+JOeetvu2JHdP7t+d5HVLLYpn6O4z3f3pyf1vZeuH8tXRq7XTW/5nsnlo8tFJXpXkryb79WoNVNU1SX4pyZ9Ntiv6tLFWGXKuTvLVbduPTPaxnq7o7jPJ1i/XJJevuB62qaoXJ3l5khPRq7U0eQvkVJKzSe5L8h9JvtndT0wO8TNwPbw3ye8k+e5k+wXRp421ypBTO+yzxgTsUlU9L8lHkrytu8+vuh521t1PdveRJNdkayT7pTsdttyq2K6qfjnJ2e7+1PbdOxyqTxvi4ArP/UiSa7dtX5Pk9Ipq4cIeraoru/tMVV2Zrf+NsmJVdShbAeeD3f3RyW69WmPd/c2q+sdsXUf1/Ko6OBkl8DNw9V6Z5Feq6rVJLklyabZGdvRpQ61yJOeTSa6bXLV+UZI3JLlnhfXww92T5PbJ/duTfHyFtZDvXyvw/iQPdve7t31Kr9ZMVb2oqp4/uf+jSX4hW9dQfSLJr00O06sV6+7f7e5ruvvF2fqd9A/d/evRp41Vq1yFfJKW35vkQJK7uvsPV1YM31dVH0pyU5IXJnk0yTuT/HWSDyf58SRfSfL67n76xcksUVX9XJJ/SvK5/OD6gXdk67ocvVojVfWybF2weiBb/7n8cHf/QVX9ZLb+6OJwks8k+Y3ufmx1lfI9VXVTkt/u7l/Wp8210pADALAoZjwGAIYk5AAAQxJyAIAhCTkAwJCEHABgSCsPOVV1bNU1MB292gz6tBn0aTPo02ZbechJ4gW0OfRqM+jTZtCnzaBPG2wdQg4AwNwtdTLAi+riviTPfcq+x/NYDuXipdXA3i2rVz/9sm9PfewXP/ucBVay/nZ6rr72jSfzohcceMq+/f48Lcq0r9Wdnn8/+zaDPq2f/8v/5jv92E4Lpz7DUkPOpXW4b6ybl3Y+NtO9p09NfewtVx1ZYCXrb9rnar8/T4vi+YflO9HHc77PTRVyZnq7qqpuraovVNXDVXXHLI8FADBPew45VXUgyfuSvCbJ9UneWFXXz6swAIBZzDKSc0OSh7v7S939nWyt0HrbfMoCAJjNLCHn6iRf3bb9yGTfU1TVsao6WVUnH4+V6QGA5Zgl5Ox00c8zrmLu7ju7+2h3H3WFOgCwLLOEnEeSXLtt+5okp2crBwBgPmYJOZ9Mcl1VvaSqLkryhiT3zKcsAIDZHNzrF3b3E1X1liT3JjmQ5K7u/vzcKgMAmIHJAAGAjbG0yQABANaVkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABjSnpd1ADbHvadPTX3sLVcdWWAlXMi0vVpUn1Z9fpgnIzkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSNXdSzvZpXW4b6ybl3Y+AGAsJ/p4zve5muZYIzkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSAdXXQCwue49fWqq42656siCK2EUq35Nrfr8zJeRHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSNXdSzvZpXW4b6ybl3a+/WDa2TkTM3ROy3O6OcxOC/vPiT6e832upjnWSA4AMCQhBwAY0kwLdFbVl5N8K8mTSZ7o7qPzKAoAYFbzWIX857v763N4HACAufF2FQAwpFlDTif5+6r6VFUdm0dBAADzMOvbVa/s7tNVdXmS+6rq37v7/u0HTMLPsSS5JM+Z8XQAANOZaSSnu09Pbs8m+ViSG3Y45s7uPtrdRw/l4llOBwAwtT2HnKp6blX92PfuJ/nFJA/MqzAAgFnM8nbVFUk+VlXfe5w/7+6/m0tVAAAzsqwDALAxLOsAAOx7Qg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMKRZ1q4iyb2nT0197C1XHVlgJQDL42cfm8BIDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJDMez8hMnsAqrHrG4d085qprZf8ykgMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGZFkHmIHp6lmVTXo9bVKtjMVIDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSZR1YmmmXQNikKeB3U+uI3z/AOjOSAwAM6YIhp6ruqqqzVfXAtn2Hq+q+qnpocnvZYssEANidaUZyPpDk1qftuyPJ8e6+LsnxyTYAwNq4YMjp7vuTnHva7tuS3D25f3eS1825LgCAmez1mpwruvtMkkxuL59fSQAAs1v4X1dV1bEkx5Lkkjxn0acDAEiy95GcR6vqyiSZ3J59tgO7+87uPtrdRw/l4j2eDgBgd/Yacu5Jcvvk/u1JPj6fcgAA5mOaPyH/UJJ/SfIzVfVIVb0pybuSvLqqHkry6sk2AMDaqO5e2skurcN9Y928tPPNYhGz0077mLthxt3p7ffvf1q7eZ1O+1wt4rW/qPOP2P8Rv/9Vv6ZYnRN9POf7XE1zrBmPAYAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJAs6wAAu7SopTIWtVzFKs17qQzLOgAA+56QAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAM6eCqC2D9LGpa8XlP7Q2LsKjp+hnLonrvNTVfRnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCGZ8ZhnMOMm+5nXP4zDSA4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhnTBkFNVd1XV2ap6YNu+36uq/6qqU5OP1y62TACA3ZlmJOcDSW7dYf97uvvI5ONv51sWAMBsLhhyuvv+JOeWUAsAwNzMck3OW6rqs5O3sy6bW0UAAHOw15DzJ0l+KsmRJGeS/PGzHVhVx6rqZFWdfDyP7fF0AAC7s6eQ092PdveT3f3dJH+a5IYfcuyd3X20u48eysV7rRMAYFf2FHKq6sptm7+a5IFnOxYAYBUOXuiAqvpQkpuSvLCqHknyziQ3VdWRJJ3ky0l+a4E1AgDs2gVDTne/cYfd719ALQAAc3PBkAPr7N7Tp6Y+9parjiywEnh2Xqf7237v/7Tf/yK+d8s6AABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDqu5e2skurcN9Y928tPPBJtrvs6MC/DAn+njO97ma5lgjOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIB1ddwDKZLp9N4LUHMB9GcgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQ9tWyDqbLB4D1NO3SSzfc8u2pH9NIDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwpH014zEAsJ6mXZXgi/2NqR/TSA4AMKQLhpyquraqPlFVD1bV56vqrZP9h6vqvqp6aHJ72eLLBQCYzjQjOU8keXt3vzTJK5K8uaquT3JHkuPdfV2S45NtAIC1cMGQ091nuvvTk/vfSvJgkquT3Jbk7slhdyd53aKKBADYrV1dk1NVL07y8iQnklzR3WeSrSCU5PJ5FwcAsFdTh5yqel6SjyR5W3ef38XXHauqk1V18vE8tpcaAQB2baqQU1WHshVwPtjdH53sfrSqrpx8/sokZ3f62u6+s7uPdvfRQ7l4HjUDAFzQNH9dVUnen+TB7n73tk/dk+T2yf3bk3x8/uUBAOzNNJMBvjLJbyb5XFWdmux7R5J3JflwVb0pyVeSvH4xJQIA7N4FQ053/3OSepZP3zzfcgAA5qO6e3knq/pakv982u4XJvn60opgFnq1GfRpM+jTZtCn9fMT3f2iaQ5casjZsYCqk919dKVFMBW92gz6tBn0aTPo02azdhUAMCQhBwAY0jqEnDtXXQBT06vNoE+bQZ82gz5tsJVfkwMAsAjrMJIDADB3Qg4AMCQhBwAYkpADAAxJyAEAhvT/DX9UkUe3ToYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 685.714x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "mol_vects = sm_en.transform(mols)\n",
    "plt.matshow(mol_vects[0].T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It is also possible to translate the vector back into SMILES as long as the character set is the one that was used to encode the molecule. The start and end tokens are stripped by default, but can be kept if wanted."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['C=CC[C@@H]1C[NH+](C)CC[C@]1(OC(=O)CC)c1ccccc1',\n",
       "       'CCC[S@@](=O)c1ccc2[nH]/c(=N\\\\C(=O)OC)[nH]c2c1'], dtype='<U45')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#It's possible to strip the start and endchar\n",
    "sm_en.reverse_transform(mol_vects[0:2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['^C=CC[C@@H]1C[NH+](C)CC[C@]1(OC(=O)CC)c1ccccc1$',\n",
       "       '^CCC[S@@](=O)c1ccc2[nH]/c(=N\\\\C(=O)OC)[nH]c2c1$'], dtype='<U47')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sm_en.reverse_transform(mol_vects[0:2], strip=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combining the vectorizer with a sequence object for training with tensorflow keras\n",
    "The generators shown below, don't work with tensorflow 2.0 version of keras, where the fit_generator call will soon be deprecated. Instead keras models supports sequences as part of the .fit(call). A basic sequence class has been implemented, that takes care of providing the randomly selected mini-batches. The index is reshuffled by keras in the end of each epoch. The Sequence abstract method __getitem__, can be customized, as in the example SmilesSequence class.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen import SmilesSequence\n",
    "\n",
    "#We make up some labels\n",
    "y = range(len(mols))\n",
    "\n",
    "smi_seq = SmilesSequence(mols, y, sm_en, batch_size=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[14 11  7]\n"
     ]
    }
   ],
   "source": [
    "batch_x, batch_y = smi_seq[0]\n",
    "print(batch_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[14 17 19]\n"
     ]
    }
   ],
   "source": [
    "smi_seq.on_epoch_end()\n",
    "# Now we will get different mini-batches\n",
    "batch_x, batch_y = smi_seq[0]\n",
    "print(batch_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combining with the batch generator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen import SmilesGenerator"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A smilesgenerator used a dataset and a smiles vectorizer as input and returns a generator object."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "y=[1,2]*10\n",
    "#Set data augmentation on\n",
    "sm_en.augment=True\n",
    "sm_en.canonical=False\n",
    "\n",
    "sm_gn = SmilesGenerator(mols,y,sm_en, batch_size=4, shuffle=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The next function gives the next batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4, 50, 21)\n",
      "(4,)\n"
     ]
    }
   ],
   "source": [
    "batch_x, batch_y = sm_gn.next()\n",
    "print(batch_x.shape)\n",
    "print(batch_y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 1 1 2]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a97c8d048>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAECCAYAAAAcvsaeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADb1JREFUeJzt3VGMZmdZB/D/Y7ttBWzoCm1qWwVNNfQCS7JpSfCiUpGCxGIiCURNL0jWC0ggwZjKDWpighcCN8SkSkMvECUC0hgjNktNNTErC1QoqdBKEGo3XXElixJLC48Xc4Bh2WW/me+bOd/3zu+XTOY7Z87Meb595sz89/3OvG91dwAARvNDcxcAALAXhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGNGvIqarbqupzVfVoVd05Zy18V1XdXVWnquqhbfsOV9V9VfXI9P6KOWskqarrqur+qnq4qj5bVW+a9uvVmqmqy6rqn6vqX6Ze/d60//lVdXzq1V9U1SVz10pSVRdV1aeq6q+nbX3aULOFnKq6KMm7k7wiyQ1JXldVN8xVD9/jvUluO2vfnUmOdff1SY5N28zr6SRv6e4XJHlxkjdM15BerZ8nk7y0u382yY1JbquqFyf5wyTvnHr130leP2ONfNebkjy8bVufNtScIzk3JXm0u7/Q3d9I8udJbp+xHibd/UCS02ftvj3JPdPje5K8el+L4vt098nu/uT0+GvZ+qF8TfRq7fSW/5k2D01vneSlSf5y2q9Xa6Cqrk3yS0n+dNqu6NPGmjPkXJPky9u2H5v2sZ6u6u6TydYv1yRXzlwP21TV85K8KMnx6NVaml4CeTDJqST3Jfm3JF/t7qenQ/wMXA/vSvLbSb41bf9o9GljzRly6hz7rDEBO1RVz0rywSRv7u4zc9fDuXX3N7v7xiTXZmsk+wXnOmx/q2K7qnpVklPd/Yntu89xqD5tiItnPPdjSa7btn1tksdnqoULe6Kqru7uk1V1dbb+N8rMqupQtgLO+7r7Q9NuvVpj3f3Vqvr7bN1H9eyqungaJfAzcH4vSfLLVfXKJJcluTxbIzv6tKHmHMn5eJLrp7vWL0ny2iT3zlgPP9i9Se6YHt+R5CMz1kK+c6/Ae5I83N3v2PYhvVozVfXcqnr29PiHk/xCtu6huj/Jr06H6dXMuvt3uvva7n5etn4nfay7fy36tLFqzlXIp7T8riQXJbm7u/9gtmL4jqp6f5JbkjwnyRNJ3pbkr5J8IMmPJ/lSktd099k3J7OPqurnkvxDks/ku/cPvDVb9+Xo1Rqpqhdm64bVi7L1n8sPdPfvV9VPZuuPLg4n+VSSX+/uJ+erlG+rqluS/FZ3v0qfNtesIQcAYK+Y8RgAGJKQAwAMScgBAIYk5AAAQxJyAIAhzR5yquro3DWwGL3aDPq0GfRpM+jTZps95CTxDbQ59Goz6NNm0KfNoE8bbB1CDgDAyu3rZICX1KV9WZ75PfueypM5lEv3rYY5/fQLv77QcZ//9DP2uJLdOUi92mSb3qdFr5Nkfa+VRejTZtj0Po3o//K/+UY/ea6FU7/Pvoacy+tw31y37tv51s1HH39woeNe/mM37nElsL4WvU4S18qc9Im5HO9jOdOnFwo5S71cVVW3VdXnqurRqrpzma8FALBKuw45VXVRkncneUWSG5K8rqpuWFVhAADLWGYk56Ykj3b3F7r7G9laofX21ZQFALCcZULONUm+vG37sWnf96iqo1V1oqpOPBUr0wMA+2OZkHOum36+7y7m7r6ru4909xF3qAMA+2WZkPNYkuu2bV+b5PHlygEAWI1lQs7Hk1xfVc+vqkuSvDbJvaspCwBgORfv9hO7++mqemOSjya5KMnd3f3ZlVUGALAEkwGydkwyBsD57NtkgAAA60rIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADGnXyzowrrlnHN6rWYwXfV5mUWbV5r6m5nbQnz/zMZIDAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhlTdvW8nu7wO9811676dD2BUlkrgoDrex3KmT9cixxrJAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEO6eO4C4KBYdBr+vZiC3xIA49GnvTHndboO5x+NkRwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEjV3ft2ssvrcN9ct+7b+c5m1le4MNfJvMx4Cz/Y8T6WM326FjnWSA4AMCQhBwAY0lILdFbVF5N8Lck3kzzd3UdWURQAwLJWsQr5z3f3V1bwdQAAVsbLVQDAkJYNOZ3k76rqE1V1dBUFAQCswrIvV72kux+vqiuT3FdV/9rdD2w/YAo/R5PksjxjydMBACxmqZGc7n58en8qyYeT3HSOY+7q7iPdfeRQLl3mdAAAC9t1yKmqZ1bVj3z7cZJfTPLQqgoDAFjGMi9XXZXkw1X17a/zZ939tyupCgBgSQdqWQfgYLJUBcxn1UuVWNYBADjwhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEjLrF0FQ06XP+JzOuj0iVXzc2Jxcz5/IzkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCqu/ftZJfX4b65bt2388Fe24tZT82kCmNxTa/W8T6WM326FjnWSA4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkmUd2DeLTm1uWnMAzseyDgDAgSfkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDunjuAjbdoksVJJYrOOjP/yBznQBzMJIDAAzpgiGnqu6uqlNV9dC2fYer6r6qemR6f8XelgkAsDOLjOS8N8ltZ+27M8mx7r4+ybFpGwBgbVww5HT3A0lOn7X79iT3TI/vSfLqFdcFALCU3d6Tc1V3n0yS6f2VqysJAGB5e/7XVVV1NMnRJLksz9jr0wEAJNn9SM4TVXV1kkzvT53vwO6+q7uPdPeRQ7l0l6cDANiZ3Yace5PcMT2+I8lHVlMOAMBqLPIn5O9P8k9JfqaqHquq1yd5e5KXVdUjSV42bQMArI3q7n072eV1uG+uW/ftfIzvoM+ku+jzH/G5M6adXNN7wbWy/o73sZzp07XIsWY8BgCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEOyrMMBYgkARrMX39N7sayAa4pVO8jLX1jWAQA48IQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIlnUAGJylKhiJZR0AgANPyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAzp4rkLWFeLzhBq1k8OMtfJZvDvz0FlJAcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMybIO57Ep06AvOq1+sjnPic3hewpYZ0ZyAIAhXTDkVNXdVXWqqh7atu93q+o/qurB6e2Ve1smAMDOLDKS894kt51j/zu7+8bp7W9WWxYAwHIuGHK6+4Ekp/ehFgCAlVnmnpw3VtWnp5ezrlhZRQAAK7DbkPPHSX4qyY1JTib5o/MdWFVHq+pEVZ14Kk/u8nQAADuzq5DT3U909ze7+1tJ/iTJTT/g2Lu6+0h3HzmUS3dbJwDAjuwq5FTV1ds2fyXJQ+c7FgBgDhecDLCq3p/kliTPqarHkrwtyS1VdWOSTvLFJL+5hzUCAOzYBUNOd7/uHLvfswe1AACsjGUd1pClGgBYZ5vye8qyDgDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkMx4vIbMYgzAOtuU31NGcgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQNn5Zh48+/uDCx27KNNQAwPKM5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhbfyyDpZqAIDNt+gyTTe9/OsLf00jOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkDZ+xmMAYPMtuoLB5/u/Fv6aRnIAgCFdMORU1XVVdX9VPVxVn62qN037D1fVfVX1yPT+ir0vFwBgMYuM5Dyd5C3d/YIkL07yhqq6IcmdSY519/VJjk3bAABr4YIhp7tPdvcnp8dfS/JwkmuS3J7knumwe5K8eq+KBADYqR3dk1NVz0vyoiTHk1zV3SeTrSCU5MpVFwcAsFsLh5yqelaSDyZ5c3ef2cHnHa2qE1V14qk8uZsaAQB2bKGQU1WHshVw3tfdH5p2P1FVV08fvzrJqXN9bnff1d1HuvvIoVy6ipoBAC5okb+uqiTvSfJwd79j24fuTXLH9PiOJB9ZfXkAALuzyGSAL0nyG0k+U1UPTvvemuTtST5QVa9P8qUkr9mbEgEAdu6CIae7/zFJnefDt662HACA1aju3r+TVf1nkn8/a/dzknxl34pgGXq1GfRpM+jTZtCn9fMT3f3cRQ7c15BzzgKqTnT3kVmLYCF6tRn0aTPo02bQp81m7SoAYEhCDgAwpHUIOXfNXQAL06vNoE+bQZ82gz5tsNnvyQEA2AvrMJIDALByQg4AMCQhBwAYkpADAAxJyAEAhvT/QuVwhyln7hEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 685.714x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(batch_y)\n",
    "plt.matshow(batch_x[0].T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In a for or while loop it will continue yielding new batches."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 1 2 2]\n",
      "[2 1 1 2]\n",
      "[1 2 1 1]\n",
      "[2 1 1 2]\n",
      "[1 1 1 1]\n",
      "[1 2 2 1]\n",
      "[2 1 2 1]\n",
      "[2 1 2 2]\n",
      "[2 1 2 2]\n",
      "[1 2 2 1]\n"
     ]
    }
   ],
   "source": [
    "for i in range(10):\n",
    "    batch_x, batch_y = sm_gn.next()\n",
    "    print(batch_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hetero Generator\n",
    "The heterogenerator is special SMILESgenerator that returns smiles for both input to encoder and teacher forcing the decoder, as well as output from the decoder\n",
    "\n",
    "Reference: https://www.mdpi.com/2218-273X/8/4/131\n",
    "\n",
    "Blog-post: https://www.wildcardconsulting.dk/learn-how-to-improve-smiles-based-molecular-autoencoders-with-heteroencoders/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen.generators import HetSmilesGenerator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "#If settings on generator is the same, it can be reused, otherwise recreate\n",
    "import copy\n",
    "vect1 = sm_en\n",
    "vect2 = copy.deepcopy(sm_en)\n",
    "vect2.augment = False # Set the augment to be false for testing purposes\n",
    "vect2.leftpad = False # Set the order of the SMILES to be from left to right\n",
    "\n",
    "print(vect1.augment)\n",
    "print(vect2.augment) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vect2.leftpad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "batchgen = HetSmilesGenerator(mols, None, vect1, vect2, batch_size=3) #Y is None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "_input, _output = batchgen.next()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The first input is one non-canonical form of the SMILES string. The second input and output is another non-canonical SMILES string of the same molecule, offset by a single character."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a97d46630>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAECCAYAAAAcvsaeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADcxJREFUeJzt3V+MpXdZB/DvY7ttBWzoCm36T0FTDb3AJdm0JHhRqdiCxGIiCURNL0jWC0ggwZjKDWpighcCN8SkSkMvEG0EbGOMtVkx1cSsLLBCSYVWglB30xVXUpRYWni8mBcYypY5M+fMvHN+8/kkmznvO++c95nznDnz3d955/er7g4AwGh+aO4CAAB2g5ADAAxJyAEAhiTkAABDEnIAgCEJOQDAkGYNOVV1a1V9rqoerao75qyF76qqu6rqbFU9tGnf4ap6oKoemT5eNmeNJFV1bVV9rKoerqrPVtVbp/16tc9U1SVV9c9V9S9Tr3532v/iqjox9erPq+qiuWslqaoLqupTVfVX07Y+ranZQk5VXZDkfUleneT6JG+squvnqofv8YEktz5j3x1Jjnf3dUmOT9vM6+kkb+/ulyR5eZI3Tz9DerX/PJnkld39M0mOJLm1ql6e5A+SvGfq1X8nedOMNfJdb03y8KZtfVpTc47k3JDk0e7+Qnd/I8mfJbltxnqYdPeDSc49Y/dtSe6ebt+d5HV7WhTfp7vPdPcnp9tfy8aL8tXRq32nN/zPtHlo+tdJXpnkL6b9erUPVNU1SX4xyZ9M2xV9Wltzhpyrk3x50/Zj0z72pyu6+0yy8cs1yeUz18MmVfWiJC9LciJ6tS9Nb4GcSnI2yQNJ/i3JV7v76ekQr4H7w3uT/FaSb03bPxp9Wltzhpw6zz5rTMA2VdXzknw4ydu6+4m56+H8uvub3X0kyTXZGMl+yfkO29uq2KyqXpvkbHd/YvPu8xyqT2viwhnP/ViSazdtX5Pk9Ey1sLXHq+rK7j5TVVdm43+jzKyqDmUj4Hywuz8y7darfay7v1pVf5+N66ieX1UXTqMEXgPn94okv1RVr0lySZJLszGyo09ras6RnI8nuW66av2iJG9Ict+M9fCD3Zfk9un27UnunbEW8p1rBd6f5OHufvemT+nVPlNVL6yq50+3fzjJz2fjGqqPJfmV6TC9mll3/3Z3X9PdL8rG76S/6+5fjT6trZpzFfIpLb83yQVJ7uru35+tGL6jqj6U5KYkL0jyeJJ3JvnLJPck+bEkX0ry+u5+5sXJ7KGq+tkk/5DkM/nu9QPvyMZ1OXq1j1TVS7NxweoF2fjP5T3d/XtV9RPZ+KOLw0k+leTXuvvJ+Srl26rqpiS/2d2v1af1NWvIAQDYLWY8BgCGJOQAAEMScgCAIQk5AMCQhBwAYEizh5yqOjZ3DSxGr9aDPq0HfVoP+rTeZg85STyB1oderQd9Wg/6tB70aY3th5ADALByezoZ4EV1cV+S537PvqfyZA7l4j2rgZ07KL36qZd+feFjP//p5+xiJTtzUPq07vZrnxZ9/u/H5/5u2Os+efy39n/533yjnzzfwqnfZ09DzqV1uG+sm/fsfLAT958+tfCxt1x1ZBcrgb236PPfc393ePy3dqKP54k+t1DIWertqqq6tao+V1WPVtUdy9wXAMAq7TjkVNUFSd6X5NVJrk/yxqq6flWFAQAsY5mRnBuSPNrdX+jub2RjhdbbVlMWAMBylgk5Vyf58qbtx6Z936OqjlXVyao6+VSsTA8A7I1lQs75Lvr5vquYu/vO7j7a3Uf3418SAABjWibkPJbk2k3b1yQ5vVw5AACrsUzI+XiS66rqxVV1UZI3JLlvNWUBACznwp1+YXc/XVVvSXJ/kguS3NXdn11ZZQAASzAZIACwNvZsMkAAgP1KyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxpx8s6jO7+06cWOu6Wq47Mep+sD/1nJIs+nxPPaeZjJAcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMqbp7z052aR3uG+vmPTsfADCWE308T/S5WuRYIzkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSBfOXQB75/7TpxY67parjgx5fg6uRZ97ieff3LxOsEpGcgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIVV379nJLq3DfWPdvGfnYz2ZnZbReE7Py+M/lhN9PE/0uVrkWCM5AMCQhBwAYEhLLdBZVV9M8rUk30zydHcfXUVRAADLWsUq5D/X3V9Zwf0AAKyMt6sAgCEtG3I6yd9W1Seq6tgqCgIAWIVl3656RXefrqrLkzxQVf/a3Q9uPmAKP8eS5JI8Z8nTAQAsZqmRnO4+PX08m+SjSW44zzF3dvfR7j56KBcvczoAgIXtOORU1XOr6ke+fTvJLyR5aFWFAQAsY5m3q65I8tGq+vb9/Gl3/81KqgIAWJJlHWCNLTpd/ahT1R/07x8OIss6AAAHnpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxpmbWrGNSiU+Unpsuf20F//A/69w9zWZffE0ZyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhHagZj9dlhsa5bed795gCHDzr8npuJAcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAM6UAt67Au01CvE48prNaiS6X42YOtGckBAIYk5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQzpQyzoA7HeLLtew6PIP27lPdodezcdIDgAwpC1DTlXdVVVnq+qhTfsOV9UDVfXI9PGy3S0TAGB7FhnJ+UCSW5+x744kx7v7uiTHp20AgH1jy5DT3Q8mOfeM3bcluXu6fXeS1624LgCApez0mpwruvtMkkwfL19dSQAAy9v1v66qqmNJjiXJJXnObp8OACDJzkdyHq+qK5Nk+nj22Q7s7ju7+2h3Hz2Ui3d4OgCA7dlpyLkvye3T7duT3LuacgAAVmORPyH/UJJ/SvLTVfVYVb0pybuSvKqqHknyqmkbAGDfqO7es5NdWof7xrp5z863F3ZjJsvt3Od2jDiTpplEWbXd+Pk76M+9uV8nt/P4L3q/69TTuZ/Tq35MT/TxPNHnapFjzXgMAAxJyAEAhiTkAABDEnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhmRZB9hn5p4Cf52s09T6wGpY1gEAOPCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhCTkAwJCEHABgSBfOXQDslUWXNph7qYDdOP/c3xOMZjeWX2H1jOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMy4zEHhllHgVXxerIejOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhbhpyququqzlbVQ5v2/U5V/UdVnZr+vWZ3ywQA2J5FRnI+kOTW8+x/T3cfmf799WrLAgBYzpYhp7sfTHJuD2oBAFiZZa7JeUtVfXp6O+uylVUEALACOw05f5TkJ5McSXImyR8+24FVdayqTlbVyafy5A5PBwCwPTsKOd39eHd/s7u/leSPk9zwA469s7uPdvfRQ7l4p3UCAGzLjkJOVV25afOXkzz0bMcCAMzhwq0OqKoPJbkpyQuq6rEk70xyU1UdSdJJvpjkN3axRgCAbdsy5HT3G8+z+/27UAsAwMpsGXKAZ3f/6VMLH3vLVUd2sRLgIPHasxjLOgAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQzLjMSzhIM8kyvowO+549GkxRnIAgCEJOQDAkIQcAGBIQg4AMCQhBwAYkpADAAxJyAEAhiTkAABDEnIAgCEJOQDAkNZ+WQfTlQP8YF77OKiM5AAAQxJyAIAhCTkAwJCEHABgSEIOADAkIQcAGJKQAwAMScgBAIYk5AAAQxJyAIAhrf2yDqYrB4D1t+gyTTfc8vWF79NIDgAwJCEHABiSkAMADEnIAQCGJOQAAEMScgCAIQk5AMCQhBwAYEhCDgAwpLWf8RgAWH+LrmDw+f6vhe/TSA4AMKQtQ05VXVtVH6uqh6vqs1X11mn/4ap6oKoemT5etvvlAgAsZpGRnKeTvL27X5Lk5UneXFXXJ7kjyfHuvi7J8WkbAGBf2DLkdPeZ7v7kdPtrSR5OcnWS25LcPR12d5LX7VaRAADbta1rcqrqRUleluREkiu6+0yyEYSSXL7q4gAAdmrhkFNVz0vy4SRv6+4ntvF1x6rqZFWdfCpP7qRGAIBtWyjkVNWhbAScD3b3R6bdj1fVldPnr0xy9nxf2913dvfR7j56KBevomYAgC0t8tdVleT9SR7u7ndv+tR9SW6fbt+e5N7VlwcAsDOLTAb4iiS/nuQzVXVq2veOJO9Kck9VvSnJl5K8fndKBADYvi1DTnf/Y5J6lk/fvNpyAABWo7p7705W9Z9J/v0Zu1+Q5Ct7VgTL0Kv1oE/rQZ/Wgz7tPz/e3S9c5MA9DTnnLaDqZHcfnbUIFqJX60Gf1oM+rQd9Wm/WrgIAhiTkAABD2g8h5865C2BherUe9Gk96NN60Kc1Nvs1OQAAu2E/jOQAAKyckAMADEnIAQCGJOQAAEMScgCAIf0/PUJR254mWFIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 685.714x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAECCAYAAADZzFwPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADbBJREFUeJzt3V+M5WdZB/Dv43bbWrChC22z/aOgKYZe4JJMWhK8qFRsQWJrIgmNml6QrBeQQIIxlRvUxAQvBG6ISZWGXiBKCkhjjGtbaqqJqWxhLSUVWglC3U2XWgg1JKUtjxdzoNPd2Z2zM+ffO/v5JJM5v3femd+z+5458533nHl+1d0BABjBTy27AACAaQkuAMAwBBcAYBiCCwAwDMEFABiG4AIADGOpwaWqbqyqr1XV41V12zJr4UVVdUdVHa+qRzaM7auqe6rqscn7i5ZZI0lVXVlV91fVo1X11ap672TcWq2Yqjq/qv69qv5jslZ/PBl/TVU9OFmrv62qc5ddK0lV7amqL1fV30+OrdMKWVpwqao9ST6W5K1Jrk5yS1Vdvax6eIlPJLnxhLHbktzX3VcluW9yzHI9n+T93f26JG9M8u7J95C1Wj3PJnlzd/9SkgNJbqyqNyb5syQfmazVd5O8a4k18qL3Jnl0w7F1WiHL3HG5Jsnj3f2N7v5hkr9JctMS62Giux9I8vQJwzcluXNy+84kNy+0KE7S3ce6+0uT289k/YH28lirldPr/m9yuHfy1knenOSuybi1WgFVdUWSX0/yV5PjinVaKcsMLpcn+faG4ycmY6ymS7v7WLL+AzPJJUuuhw2q6tVJ3pDkwVirlTR5+uFIkuNJ7knyX0m+193PT6Z4DFwNH03yB0l+NDl+ZazTSllmcKlNxlx/AM5QVb08yWeSvK+7v7/sethcd7/Q3QeSXJH1HefXbTZtsVWxUVW9Pcnx7n5o4/AmU63TEp2zxHM/keTKDcdXJDm6pFrY2pNVtb+7j1XV/qz/1siSVdXerIeWT3b3ZyfD1mqFdff3quqfs/66pFdU1TmT3+Y9Bi7fm5L8RlW9Lcn5SS7M+g6MdVohy9xx+WKSqyav1j43yTuT3L3Eeji9u5PcOrl9a5LPL7EW8pPn3j+e5NHu/vCGD1mrFVNVF1fVKya3fzrJr2b9NUn3J/mtyTRrtWTd/YfdfUV3vzrrP5O+0N2/Heu0UmqZV4eepNqPJtmT5I7u/tOlFcNPVNWnklyX5FVJnkzywSR/l+TTSX42ybeSvKO7T3wBLwtUVb+c5F+SfCUvPh//gay/zsVarZCqen3WX9S5J+u/MH66u/+kqn4+63+YsC/Jl5P8Tnc/u7xK+bGqui7J73f3263TallqcAEAOBM65wIAwxBcAIBhCC4AwDAEFwBgGIILADCMpQeXqjq47BqYjrUag3Uag3Uag3VaPUsPLkncKcZhrcZgncZgncZgnVbMKgQXAICpLLQB3bl1Xp+fl71k7Lk8m705b2E1sH07XavXvv4HU837+sMXbPsc+J5aRZvd97/zvy/k4lfuOWnc/X/2dvLY4/tpcZ7Jd5/q7ou3mrfQ4HJh7etr6/qFnY/Vcujokanm3XDZgTlXAos17X0/cf+fB489Y7i373qou9e2mrejp4qq6saq+lpVPV5Vt+3kawEAbGXbwaWq9iT5WJK3Jrk6yS1VdfWsCgMAONFOdlyuSfJ4d3+ju3+Y9Stn3jSbsgAATraT4HJ5km9vOH5iMvYSVXWwqg5X1eHn4irgAMD27SS41CZjJ73St7tv7+617l7zymwAYCd2ElyeSHLlhuMrkhzdWTkAAKe2k+DyxSRXVdVrqurcJO9McvdsygIAONk52/3E7n6+qt6T5FCSPUnu6O6vzqwyAIATaEAHACzdQhrQAQAskuACAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYggsAMAzBBQAYxrZb/u92h44emWreDZcdmHMlnI51Yrdxn4bTs+MCAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYggsAMAzBBQAYhuACAAxDcAEAhlHdvbCTXVj7+tq6fmHnAwDGcG/f9VB3r201z44LADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGMY5yy6Akx06emTquTdcdmCOlcDpTXtfdT9dLo8p7CZ2XACAYQguAMAwBBcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYVR3L+xkF9a+vrauX9j5eCldTtlNdINdLv//zNq9fddD3b221Tw7LgDAMAQXAGAYO7rIYlV9M8kzSV5I8vw0WzwAANs1i6tD/0p3PzWDrwMAcFqeKgIAhrHT4NJJ/qmqHqqqg7MoCADgVHb6VNGbuvtoVV2S5J6q+s/ufmDjhEmgOZgk5+eCHZ4OADib7WjHpbuPTt4fT/K5JNdsMuf27l7r7rW9OW8npwMAznLbDi5V9bKq+pkf307ya0kemVVhAAAn2slTRZcm+VxV/fjr/HV3/+NMqgIA2ISW/3CCs72V+dn+7weWQ8t/AGDXEVwAgGEILgDAMAQXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGHs5FpFRHv03ehsX6ez/d8Py+LnyXTsuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAw9A5d4fOpHvhtF0R5/E1z/TrArBYHqOnY8cFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwDMEFABiG4AIADEPL/wWaRztnLaJhay6NAbuHHRcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwDC3/gV3vTNr4T3t5AJcGWC6XcTh72XEBAIaxZXCpqjuq6nhVPbJhbF9V3VNVj03eXzTfMgEApttx+USSG08Yuy3Jfd19VZL7JscAAHO1ZXDp7geSPH3C8E1J7pzcvjPJzTOuCwDgJNt9jcul3X0sSSbvL5ldSQAAm5v7XxVV1cEkB5Pk/Fww79MBALvYdndcnqyq/UkyeX/8VBO7+/buXuvutb05b5unAwDYfnC5O8mtk9u3Jvn8bMoBADi1af4c+lNJ/i3JL1bVE1X1riQfSvKWqnosyVsmxwAAc1XdvbCTXVj7+tq6fqq5Z9IVcR7O5k6LOlIya/P4fj7b73vz+j6dR+fg3diNeNn36d34f3pv3/VQd69tNU/nXABgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADCMlW35D7O07PboIxmpRTiwe2j5DwDsOoILADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADCMc5ZdAGObVyv9WZvXubXHh9kZ5fGE5bLjAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIbgAgAMQ+dcdkT3SmBWPJ4wDTsuAMAwBBcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwDMEFABjGlsGlqu6oquNV9ciGsT+qqv+pqiOTt7fNt0wAgOl2XD6R5MZNxj/S3Qcmb/8w27IAAE62ZXDp7geSPL2AWgAATmsnr3F5T1U9PHkq6aKZVQQAcArbDS5/keQXkhxIcizJn59qYlUdrKrDVXX4uTy7zdMBAGwzuHT3k939Qnf/KMlfJrnmNHNv7+617l7bm/O2WycAwPaCS1Xt33D4m0keOdVcAIBZOWerCVX1qSTXJXlVVT2R5INJrquqA0k6yTeT/N4cawQASDJFcOnuWzYZ/vgcagEAOK0tgwss2qGjR6aee8NlB+ZYCXA28dgzBi3/AYBhCC4AwDAEFwBgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBh6JzLytGRkhHosrr7WKcx2HEBAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAw1hoy//Xvv4HOXRoujbZWi8Dq8xjFCyHHRcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwjIW2/P/6wxdokw0AC3Lo6HSX2VkFe/ZPN8+OCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwjIV2zgUAFmesbvWPTzXLjgsAMIwtg0tVXVlV91fVo1X11ap672R8X1XdU1WPTd5fNP9yAYCz2TQ7Ls8neX93vy7JG5O8u6quTnJbkvu6+6ok902OAQDmZsvg0t3HuvtLk9vPJHk0yeVJbkpy52TanUlunleRAADJGb7GpapeneQNSR5Mcml3H0vWw02SS2ZdHADARlMHl6p6eZLPJHlfd3//DD7vYFUdrqrDz+XZ7dQIAJBkyuBSVXuzHlo+2d2fnQw/WVX7Jx/fn+T4Zp/b3bd391p3r+3NebOoGQA4S03zV0WV5ONJHu3uD2/40N1Jbp3cvjXJ52dfHgDAi6ZpQPemJL+b5CtVdWQy9oEkH0ry6ap6V5JvJXnHfEoEAFi3ZXDp7n9NUqf48PWzLQcA4NSquxd3sqrvJPnvE4ZfleSphRXBTlirMVinMVinMVinxfm57r54q0kLDS6bFlB1uLvXlloEU7FWY7BOY7BOY7BOq8e1igCAYQguAMAwViG43L7sApiatRqDdRqDdRqDdVoxS3+NCwDAtFZhxwUAYCqCCwAwDMEFABiG4AIADENwAQCG8f9bslFtp/cjagAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 672x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAECCAYAAADZzFwPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADaFJREFUeJzt3V+MpXdZB/Dv43bbCtjQFdps/yhoqqEXsCSTlgQvKhVbkFhMJIGo6QXJegEJJBhTuUFNTPBC4IaYVNu0F4iSAtIY49qWmmpigC2sUFKxlSDU3XQlQKgxKS08XsyBDruznbMzZ857fjOfTzI5533nN/M+M8+ZM9/5nXd+b3V3AABG8BNTFwAAMC/BBQAYhuACAAxDcAEAhiG4AADDEFwAgGFMGlyq6uaq+kpVPV5Vt01ZC8+pqjur6nRVPbJh36Gquq+qHpvdXjpljSRVdXVVPVhVj1bVl6vqXbP9erViquriqvpsVf3brFd/NNv/8qr6zKxXf1NVF05dK0lVHaiqL1TV38229WmFTBZcqupAkg8neUOSa5O8raqunaoefsxdSW4+Y99tSR7o7muSPDDbZlrPJnlPd78iyWuSvGP2M6RXq+fpJK/r7lclOZLk5qp6TZI/TfLBWa++neTtE9bIc96V5NEN2/q0QqaccbkuyePd/dXu/l6Sv05yy4T1MNPdDyX51hm7b0ly9+z+3UnevNSiOEt3n+ruz8/uP5X1J9oro1crp9f972zz4Oytk7wuyT2z/Xq1AqrqqiS/luQvZ9sVfVopUwaXK5N8Y8P2E7N9rKbLu/tUsv4LM8llE9fDBlX1siSvTvKZ6NVKmr38cCLJ6ST3JfnPJN/p7mdnQzwHroYPJfn9JD+Ybf909GmlTBlcapN9rj8A56mqXpTk40ne3d3fnboeNtfd3+/uI0muyvqM8ys2G7bcqtioqt6U5HR3P7xx9yZD9WlCF0x47CeSXL1h+6okJyeqha09WVWHu/tUVR3O+l+NTKyqDmY9tHykuz8x261XK6y7v1NV/5T185JeXFUXzP6a9xw4vdcm+fWqemOSi5NckvUZGH1aIVPOuHwuyTWzs7UvTPLWJPdOWA/P794kt87u35rkUxPWQn702vsdSR7t7g9seJderZiqemlVvXh2/yeT/ErWz0l6MMlvzobp1cS6+w+6+6ruflnWfyd9urt/K/q0UmrKq0PPUu2HkhxIcmd3/8lkxfAjVfXRJDckeUmSJ5O8L8nfJvlYkp9J8vUkb+nuM0/gZYmq6peS/HOSL+W51+Pfm/XzXPRqhVTVK7N+UueBrP/B+LHu/uOq+rms/2PCoSRfSPLb3f30dJXyQ1V1Q5Lf6+436dNqmTS4AACcDyvnAgDDEFwAgGEILgDAMAQXAGAYggsAMIzJg0tVHZ26BuajV2PQpzHo0xj0afVMHlySeFCMQ6/GoE9j0Kcx6NOKWYXgAgAwl6UuQHdhXdQX54U/tu+ZPJ2DuWhpNbB9m/XqF175f3N//H988QWLLolN+Jlajp0+9vVpZ5b13KNPy/NUvv3N7n7pVuOWGlwuqUN9fd24tOOx+46dPDH32JuuOLKLlcByeexPy/d/77m/73m4u9e2Grejl4qq6uaq+kpVPV5Vt+3kcwEAbGXbwaWqDiT5cJI3JLk2yduq6tpFFQYAcKadzLhcl+Tx7v5qd38v61fOvGUxZQEAnG0nweXKJN/YsP3EbN+PqaqjVXW8qo4/E1cBBwC2byfBpTbZd9aZvt19e3evdfeaM7MBgJ3YSXB5IsnVG7avSnJyZ+UAAJzbToLL55JcU1Uvr6oLk7w1yb2LKQsA4GwXbPcDu/vZqnpnkmNJDiS5s7u/vLDKAADOYAE6AGByS1mADgBgmQQXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGEILgDAMLa95P+Ijp08MffYm644souV8Hz0ib3GYxoWx4wLADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGEZ199IOdkkd6uvrxqUdDwAYw/19z8PdvbbVODMuAMAwBBcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBgXTF3AfnLs5Im5xt10xZFdrgTObd7HaeKxOiV9Yr8y4wIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiCCwAwDMEFABiG4AIADKO6e2kHu6QO9fV149KOtx9YPZO9xgrT0/L9Zyr39z0Pd/faVuPMuAAAwxBcAIBh7Ogii1X1tSRPJfl+kmfnmeIBANiuRVwd+pe7+5sL+DwAAM/LS0UAwDB2Glw6yT9W1cNVdXQRBQEAnMtOXyp6bXefrKrLktxXVf/e3Q9tHDALNEeT5OK8YIeHAwD2sx3NuHT3ydnt6SSfTHLdJmNu7+617l47mIt2cjgAYJ/bdnCpqhdW1U/98H6SX03yyKIKAwA4005eKro8ySer6oef56+6+x8WUhUAwCYs+c++sN8vjbDfv35g9VnyHwDYcwQXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYO7lW0Z427xLplkcfw37v037/+mFKfp8slhkXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYVs49h3lXMJx3RcSpPycA0/A8vVhmXACAYQguAMAwBBcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwLPm/Q7uxlLPloWFrLo0B+5MZFwBgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMS/4DQzqfZfxdHmAM+sQ8zLgAAMPYMrhU1Z1VdbqqHtmw71BV3VdVj81uL93dMgEA5ptxuSvJzWfsuy3JA919TZIHZtsAALtqy+DS3Q8l+dYZu29Jcvfs/t1J3rzgugAAzrLdc1wu7+5TSTK7vWxxJQEAbG7X/6uoqo4mOZokF+cFu304AGAP2+6My5NVdThJZrenzzWwu2/v7rXuXjuYi7Z5OACA7QeXe5PcOrt/a5JPLaYcAIBzm+ffoT+a5F+T/GJVPVFVb0/y/iSvr6rHkrx+tg0AsKuqu5d2sLVXXdyfPXb10o53pv2+0uK8q1Lu9+8T8zuflU7ntd8ff7vxc7obK9LuxVVud+PxnOzv7+n5uL/vebi717YaZ+VcAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYggsAMIylLvl/SR3q6+vGpR2PvW/q5dFHsReXBwf2Fkv+AwB7juACAAxDcAEAhiG4AADDEFwAgGEILgDAMAQXAGAYggsAMAzBBQAYhuACAAzjgqkLYPWcz5L3Uy8lvxvHn/prgr1mNy7Nwf5lxgUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIaVczmL1SuBRfKcwiKZcQEAhiG4AADDEFwAgGEILgDAMAQXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDEFwAgGEILgDAMLYMLlV1Z1WdrqpHNuz7w6r676o6MXt74+6WCQAw34zLXUlu3mT/B7v7yOzt7xdbFgDA2bYMLt39UJJvLaEWAIDntZNzXN5ZVV+cvZR06cIqAgA4h+0Glz9P8vNJjiQ5leTPzjWwqo5W1fGqOv5Mnt7m4QAAthlcuvvJ7v5+d/8gyV8kue55xt7e3WvdvXYwF223TgCA7QWXqjq8YfM3kjxyrrEAAItywVYDquqjSW5I8pKqeiLJ+5LcUFVHknSSryX53V2sEQAgyRzBpbvftsnuO3ahFgCA57VlcIFFOXbyxFzjbrriyC5XAuwnnnv2Fkv+AwDDEFwAgGEILgDAMAQXAGAYggsAMAzBBQAYhuACAAxDcAEAhiG4AADDsHIuS2NVSkZgldW9R6/2FjMuAMAwBBcAYBiCCwAwDMEFABiG4AIADENwAQCGIbgAAMMQXACAYQguAMAwBBcAYBiW/AfYwPLwsNrMuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhWPIfAAZz7OSJqUtYuAOH5xtnxgUAGIbgAgAMQ3ABAIYhuAAAwxBcAIBhCC4AwDAEFwBgGIILADAMwQUAGIaVcwFgMDddcWTqEnbB43ONMuMCAAxjy+BSVVdX1YNV9WhVfbmq3jXbf6iq7quqx2a3l+5+uQDAfjbPjMuzSd7T3a9I8pok76iqa5PcluSB7r4myQOzbQCAXbNlcOnuU939+dn9p5I8muTKJLckuXs27O4kb96tIgEAkvM8x6WqXpbk1Uk+k+Ty7j6VrIebJJctujgAgI3mDi5V9aIkH0/y7u7+7nl83NGqOl5Vx5/J09upEQAgyZzBpaoOZj20fKS7PzHb/WRVHZ69/3CS05t9bHff3t1r3b12MBctomYAYJ+a57+KKskdSR7t7g9seNe9SW6d3b81yacWXx4AwHPmWYDutUl+J8mXqurEbN97k7w/yceq6u1Jvp7kLbtTIgDAui2DS3f/S5I6x7tvXGw5AADnVt29vINV/U+S/zpj90uSfHNpRbATejUGfRqDPo1Bn5bnZ7v7pVsNWmpw2bSAquPdvTZpEcxFr8agT2PQpzHo0+pxrSIAYBiCCwAwjFUILrdPXQBz06sx6NMY9GkM+rRiJj/HBQBgXqsw4wIAMBfBBQAYhuACAAxDcAEAhiG4AADD+H9X6EkJQHtdjAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 672x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(_input[0][0].T)\n",
    "plt.matshow(_input[1][0].T)\n",
    "plt.matshow(_output[0].T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2D embeddings with Chemception\n",
    "\n",
    "Chemception used small chemical \"images\" and is suitable for modelling using image architectures such as convolutional neural networks and Inception modules and similar\n",
    "\n",
    "Reference: https://arxiv.org/abs/1706.06689\n",
    "\n",
    "Blog-post: https://www.wildcardconsulting.dk/learn-how-to-teach-your-computer-to-see-chemistry-free-chemception-models-with-rdkit-and-keras/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen.vectorizers import ChemceptionVectorizer\n",
    "chemcepterizer = ChemceptionVectorizer()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocessing\n",
    "Molecules must have 2D coordinates and gasteiger charges computed. The preprocess function can do that"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "mols_array = chemcepterizer.preprocess_mols(mols)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Transform all molecules and show first three channels of the first molecule as an image. The augment property on the object controls if augmentation (rotation) should be active and can be overruled in the function call"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a97d72c50>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAD8CAYAAABXXhlaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADWtJREFUeJzt3V+sHOV9xvHvUxuXNAGBISALQ8GSlZCLYCKLgogqQpvIpShwUaqkreS2qOcmlYjaKoG2apOqlcpNoBdNIgtofNEGCDSAuChYLlZ7URnM35gYx4RSsOziVmAl6QWK4deLHVfHp8fePefM7p7j9/uRjnZnPLvzk2efnXln3n0nVYWktvzMtAuQNHkGX2qQwZcaZPClBhl8qUEGX2qQwZcatKTgJ9mSZH+SV5Pc3ldRksYri+3Ak2QV8APg08BB4Bng81X1/f7KkzQOq5fw2quAV6vqNYAk9wM3AScNfhK7CUpjVlUZtsxSDvUvAt6cNX2wmydpmVvKHn++b5X/t0dPMgPMLGE9knq2lOAfBC6eNb0eODR3oaraBmwDD/Wl5WIph/rPABuTXJZkDfA54LF+ypI0Tove41fVsSS/DzwBrALuq6qXe6tM0tgs+nLeolbmob40duM+qy9phTL4UoMMvtQggy81yOBLDTL4UoMMvtQggy81yOBLDTL4UoMMvtQggy81yOBLDTL4UoMMvtQggy81yOBLDTL4UoMMvtQggy81yOBLDTL4UoMMvtQggy81yOBLDRoa/CT3JTmSZO+seWuT7EhyoHs8d7xlSurTKHv8bwFb5sy7HdhZVRuBnd20pBViaPCr6l+At+fMvgnY3j3fDtzcc12SxmixbfwLq+owQPd4QX8lSRq3Rd8me1RJZoCZca9H0ugWu8d/K8k6gO7xyMkWrKptVbW5qjYvcl2SerbY4D8GbO2ebwUe7accSZOQqjr1Asm3geuA84G3gD8HHgEeBC4B3gBuqaq5JwDne69Tr0zSklVVhi0zNPh9MvjS+I0SfHvuSQ0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0aGvwkFyd5Ksm+JC8nua2bvzbJjiQHusdzx1+upD6Mcu+8dcC6qnouyVnAs8DNwG8Db1fVXye5HTi3qr485L28hZY0Zr3cQquqDlfVc93zHwP7gIuAm4Dt3WLbGXwZSFoBFtTGT3IpcCWwG7iwqg7D4MsBuKDv4iSNx+pRF0zyIeBh4ItV9aNk6NHE8dfNADOLK0/SOIx0m+wkZwCPA09U1de6efuB66rqcHceYFdVfWTI+9jGl8aslzZ+Brv2e4F9x0PfeQzY2j3fCjy6mCIlTd4oZ/U/Cfwr8D3g/W72HzNo5z8IXAK8AdxSVW8PeS/3+NKYjbLHH+lQvy8GXxq/UYI/8sk9qf7y306Yzp9eM6VKtFR22ZUaZPClBtnG18jmbrzRenJo0nq5nCfp9GPwpQYZfKlBBl9qkMGXGmTwpQbZc2+Fm9ubjpP1puuh193ca0Sz120vvpXFPb7UIIMvNcjgSw2yy+4Kt9hutLNft5jXLOR1miy77Eqal8GXGuTlvBVoMYfpp3y/U10S9JLdack9vtQggy81yLP6K1DfPeZOdba+72aFxs+z+pLmZfClBhl8qUG28Ve4cffcs42/8vR177wzkzyd5MUkLyf5ajf/siS7kxxI8kCSNX0ULWn8RjnUfxe4vqquADYBW5JcDdwJ3FVVG4F3gFvHV6akPg3tuVeDtsBPuskzur8Crgd+o5u/HfgK8I3+S9RCnNAL71SX+kZc7oRLe6/87on/9tH7FlidlouRTu4lWZXkBeAIsAP4IXC0qo51ixwELhpPiZL6NlLwq+q9qtoErAeuAi6fb7H5XptkJsmeJHsWX6akPi3ocl5VHQV2AVcD5yQ53lRYDxw6yWu2VdXmqtq8lEIl9Wfo5bwkHwZ+WlVHk3wAeJLBib2twMNVdX+SbwIvVdXXh7yXl/OWCS/nnb5GuZw3SvA/zuDk3SoGRwgPVtVfJNkA3A+sBZ4Hfquq3h3yXgZ/mTD4p69egt8ng798GPzT1yjBdyCORp3sF3iAg280wL76UoMMvtQg2/iNGnVDLHbo7ZP1DBz1fMLc97DJMToH4pA0L4MvNcjgSw2yjd+IuWPnj9pmPmGDnWL8/ZHb7ot8D43ONr6keRl8qUEe6jeijzvderfclcFDfUnzMvhSgwy+1CCDLzXI4EsNMvhSgxyIoxVzeszVYnrMnaLXnVYW9/hSgwy+1CB77jXqhA1Rj5z4j7l53tfYU29lsOeepHkZfKlBBl9qkG18+au700yvbfzuVtnPJ3m8m74sye4kB5I8kGTNUoqVNDkLOdS/Ddg3a/pO4K6q2gi8A9zaZ2GSxmek4CdZD/wqcE83HeB64KFuke3A/NeAJC07o+7x7wa+BLzfTZ8HHK2qY930QeCinmuTNCZDg5/kRuBIVT07e/Y8i8574i7JTJI9SfYsskZJPRvlRzrXAp9NcgNwJnA2gyOAc5Ks7vb664FD8724qrYB28Cz+tJysaDLeUmuA/6oqm5M8h3g4aq6P8k3gZeq6utDXm/wl6HFjrmv5WncXXa/DPxBklcZtPnvXcJ7SZqgBf0ev6p2Abu6568BV/VfkqRxs+eedJrx13mS5mXwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9q0Eh30knyOvBj4D3gWFVtTrIWeAC4FHgd+PWqemc8ZUrq00L2+J+qqk1Vtbmbvh3YWVUbgZ3dtKQVYCmH+jcB27vn24Gbl16OpEkYNfgFPJnk2SQz3bwLq+owQPd4wTgKlNS/Ue+We21VHUpyAbAjySujrqD7opgZuqCkiVnw3XKTfAX4CfB7wHVVdTjJOmBXVX1kyGu9W640Zr3cLTfJB5Ocdfw58BlgL/AYsLVbbCvw6OJLlTRJQ/f4STYA3+0mVwP/UFV/leQ84EHgEuAN4JaqenvIe7nHl8ZslD3+gg/1l8LgS+PXy6G+pNOPwZcaZPClBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZcaZPClBhl8qUEGX2rQSMFPck6Sh5K8kmRfkmuSrE2yI8mB7vHccRcrqR+j7vH/BvinqvoocAWwD7gd2FlVG4Gd3bSkFWCUm2aeDbwIbKhZCyfZj7fJlpadvu6dtwH4L+Dvkjyf5J7udtkXVtXhbkWHgQuWVK2kiRkl+KuBTwDfqKorgf9hAYf1SWaS7EmyZ5E1SurZKME/CBysqt3d9EMMvgje6g7x6R6PzPfiqtpWVZuranMfBUtauqHBr6r/BN5Mcrz9/kvA94HHgK3dvK3Ao2OpUFLvhp7cA0iyCbgHWAO8BvwOgy+NB4FLgDeAW6rq7SHv48k9acxGObk3UvD7YvCl8evrrL6k04zBlxpk8KUGGXypQQZfapDBlxpk8KUGrZ7w+v4b+A/g/O75NC2HGsA65rKOEy20jp8fZaGJduD5v5Ume6bdd3851GAd1jGtOjzUlxpk8KUGTSv426a03tmWQw1gHXNZx4nGUsdU2viSpstDfalBEw1+ki1J9id5NcnERuVNcl+SI0n2zpo38eHBk1yc5KluiPKXk9w2jVqSnJnk6SQvdnV8tZt/WZLdXR0PJFkzzjpm1bOqG8/x8WnVkeT1JN9L8sLxYeKm9BmZyFD2Ewt+klXA3wK/AnwM+HySj01o9d8CtsyZN43hwY8Bf1hVlwNXA1/o/g8mXcu7wPVVdQWwCdiS5GrgTuCuro53gFvHXMdxtzEYsv24adXxqaraNOvy2TQ+I5MZyr6qJvIHXAM8MWv6DuCOCa7/UmDvrOn9wLru+Tpg/6RqmVXDo8Cnp1kL8HPAc8AvMOgosnq+7TXG9a/vPszXA48DmVIdrwPnz5k30e0CnA38O925t3HWMclD/YuAN2dNH+zmTctUhwdPcilwJbB7GrV0h9cvMBgkdQfwQ+BoVR3rFpnU9rkb+BLwfjd93pTqKODJJM8mmenmTXq7TGwo+0kGf77hgJq8pJDkQ8DDwBer6kfTqKGq3quqTQz2uFcBl8+32DhrSHIjcKSqnp09e9J1dK6tqk8waIp+IckvTmCdcy1pKPuFmGTwDwIXz5peDxya4PrnGml48L4lOYNB6P++qv5xmrUAVNVRYBeDcw7nJDn++41JbJ9rgc8meR24n8Hh/t1TqIOqOtQ9HgG+y+DLcNLbZUlD2S/EJIP/DLCxO2O7BvgcgyG6p2Xiw4MnCXAvsK+qvjatWpJ8OMk53fMPAL/M4CTSU8CvTaqOqrqjqtZX1aUMPg//XFW/Oek6knwwyVnHnwOfAfYy4e1SkxzKftwnTeacpLgB+AGD9uSfTHC93wYOAz9l8K16K4O25E7gQPe4dgJ1fJLBYetLwAvd3w2TrgX4OPB8V8de4M+6+RuAp4FXge8APzvBbXQd8Pg06ujW92L39/Lxz+aUPiObgD3dtnkEOHccddhzT2qQPfekBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZca9L8nnhCYdNuL8wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "outputs = chemcepterizer.transform(mols_array)\n",
    "plt.imshow(outputs[0,:,:,:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The SmilesGenerator are reused as generator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen import ChemceptionGenerator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "chemceptgenerator = ChemceptionGenerator(mols_array, y, chemcepterizer, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a97cdd320>"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAAD8CAYAAABXXhlaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAADW5JREFUeJzt3V2sXNV5xvH/Uzsu+QDxFYiFoYBkJeSimMiiIKKK0CZxEQ1clCpRL9wW9UhVKhGlVQJt1RIpkcpNoBdNIyvQ+KINkJAAcqWC5YLaSpXBfMZgHBNKwbKLU4GVtBdRDG8vZjs9HA6eOXNm9vh4/X/S0cze3jP7lWee2WvtWbN2qgpJbfmFWRcgqX8GX2qQwZcaZPClBhl8qUEGX2qQwZcatKzgJ9mUZG+SF5LcNKmiJE1Xxh3Ak2QV8APg48B+4DHgM1X13OTKkzQNq5fx2EuBF6rqRYAkdwHXAu8Y/CQOE5SmrKoybJvlNPXPAV6Zt7y/WyfpOLecI/5inypvO6InmQPmlrEfSRO2nODvB86dt7wOOLBwo6raAmwBm/rS8WI5Tf3HgPVJLkiyBvg08MBkypI0TWMf8avqSJI/Ah4EVgF3VtWzE6tM0tSM/XXeWDuzqS9N3bTP6ktaoQy+1CCDLzXI4EsNMvhSgwy+1CCDLzXI4EsNMvhSgwy+1CCDLzXI4EsNMvhSgwy+1CCDLzXI4EsNMvhSgwy+1CCDLzXI4EsNMvhSgwy+1CCDLzXI4EsNMvhSg4YGP8mdSQ4l2T1v3elJtifZ192eNt0yJU3SKEf8bwKbFqy7CdhRVeuBHd2ypBViaPCr6l+A1xasvhbY2t3fClw34bokTdG4ffyzq+ogQHd71uRKkjRtY18me1RJ5oC5ae9H0ujGPeK/mmQtQHd76J02rKotVbWxqjaOuS9JEzZu8B8ANnf3NwP3T6YcSX1IVR17g+RbwJXAmcCrwF8C9wH3AOcBLwPXV9XCE4CLPdexdyZp2aoqw7YZGvxJMvjS9I0SfEfuSQ0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0aGvwk5yZ5OMmeJM8mubFbf3qS7Un2dbenTb9cSZMwyrXz1gJrq+qJJCcDjwPXAb8LvFZVf5XkJuC0qvrikOfyElrSlE3kElpVdbCqnuju/wTYA5wDXAts7TbbyuDDQNIKsKQ+fpLzgUuAncDZVXUQBh8OwFmTLk7SdKwedcMk7wPuBT5XVT9OhrYmjj5uDpgbrzxJ0zDSZbKTvAvYBjxYVV/t1u0Frqyqg915gEeq6oNDnsc+vjRlE+njZ3BovwPYczT0nQeAzd39zcD94xQpqX+jnNX/KPCvwPeBN7vVf8qgn38PcB7wMnB9Vb025Lk84ktTNsoRf6Sm/qQYfGn6JtLUl3TiMfhSgwy+1CCDLzXI4EsNMvhSgwy+1KCRx+pLC9WX//3n9/Pnl8+wEi2VR3ypQQZfapBDdjWyhS/eaD/MVt8csitpUQZfapDBlxpk8KUGGXypQQZfapAj93RMjs47MXnElxpk8KUGOXJPIxt35J7dhX45ck/Sogy+1CCDLzXIPr5GNr+vDsD8/vrzv//Wf/vQnUvfwYLn93zAeCZ17byTkjya5Okkzyb5Urf+giQ7k+xLcneSNZMoWtL0jdLU/ylwVVVdDGwANiW5DLgVuK2q1gOvAzdMr0xJk7Skpn6S9wD/Bvwh8I/AB6rqSJLLgVuq6pNDHm9TfwU71td5C7sB4zTTnehjMib2dV6SVUmeAg4B24EfAoer6ki3yX7gnHELldSvkYJfVW9U1QZgHXApcNFimy322CRzSXYl2TV+mZImaUlf51XVYeAR4DLg1CRHf+SzDjjwDo/ZUlUbq2rjcgqVNDlD+/hJ3g/8rKoOJ3k38BCDE3ubgXur6q4kXweeqaqvDXku+/hjmET/eex9z9/viNsN23aaz6HR+vij/Cx3LbA1ySoGLYR7qmpbkueAu5J8GXgSuGNZ1UrqzdDgV9UzwCWLrH+RQX9f0grjyL0VoO8mcM0bhZcRR+BNpKnvyL2J8Nd5khZl8KUG2dRfgY51pv0d/4PHbEaPvK9j/IBn1G8DPIs/GTb1JS3K4EsNMvhSg+zjr3CjfgV2rP/4kUfkTeA8wbjnAjQ6+/iSFmXwpQbZ1Nex59I7Bpvmxyeb+pIWZfClBhl8qUH28Rs1iaGyDrc9PtnHl7Qogy81aJSpt6RFTXpeffXHI77UIIMvNciz+o2a3zQft1nuWf3jk2f1JS3K4EsNMvhSg+zjN2rciTne8hwTOE+gyZtoH7+7VPaTSbZ1yxck2ZlkX5K7k6xZTrGS+rOUpv6NwJ55y7cCt1XVeuB14IZJFiZpekZq6idZB2wFvgJ8HvhN4EfAB6rqSJLLgVuq6pNDnsem/gpQdd//L+S6d9zOr/COT5Ns6t8OfAF4s1s+AzhcVUe65f3AOUuuUNJMDA1+kmuAQ1X1+PzVi2y66NE8yVySXUl2jVmjpAkb5Uc6VwCfSnI1cBJwCoMWwKlJVndH/XXAgcUeXFVbgC1gU186Xizp67wkVwJ/UlXXJPk2cG9V3ZXk68AzVfW1IY83+CuAQ3FXtmkP2f0i8PkkLzDo89+xjOeS1CMH8OhtPOKvbKMc8Z2IQ287K2vYT3yO1ZcaZPClBtnU1zGvYKsTk0d8qUEGX2qQwZca5Pf40gnGyTYlLcrgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNcjgSw0y+FKDDL7UIIMvNWikWXaTvAT8BHgDOFJVG5OcDtwNnA+8BPx2Vb0+nTIlTdJSjvgfq6oNVbWxW74J2FFV64Ed3bKkFWA5Tf1rga3d/a3AdcsvR1IfRg1+AQ8leTzJXLfu7Ko6CNDdnjWNAiVN3qhX0rmiqg4kOQvYnuT5UXfQfVDMDd1QUm+WPL12kluA/wH+ALiyqg4mWQs8UlUfHPJYp9eWpmwi02sneW+Sk4/eBz4B7AYeADZ3m20G7h+/VEl9GnrET3Ih8L1ucTXwD1X1lSRnAPcA5wEvA9dX1WtDnssjvjRloxzxvZKOdILxSjqSFmXwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGmTwpQYZfKlBBl9qkMGXGjRS8JOcmuQ7SZ5PsifJ5UlOT7I9yb7u9rRpFytpMkY94v818E9V9SHgYmAPcBOwo6rWAzu6ZUkrwCgXzTwFeBq4sOZtnGQvXiZbOu5M6tp5FwI/Av4uyZNJvtFdLvvsqjrY7eggcNayqpXUm1GCvxr4CPC3VXUJ8L8soVmfZC7JriS7xqxR0oSNEvz9wP6q2tktf4fBB8GrXROf7vbQYg+uqi1VtbGqNk6iYEnLNzT4VfVfwCtJjvbffw14DngA2Nyt2wzcP5UKJU3c0JN7AEk2AN8A1gAvAr/H4EPjHuA84GXg+qp6bcjzeHJPmrJRTu6NFPxJMfjS9E3qrL6kE4zBlxpk8KUGGXypQQZfapDBlxpk8KUGre55f/8N/CdwZnd/lo6HGsA6FrKOt1pqHb80yka9DuD5+U6TXbMeu3881GAd1jGrOmzqSw0y+FKDZhX8LTPa73zHQw1gHQtZx1tNpY6Z9PElzZZNfalBvQY/yaYke5O8kKS3WXmT3JnkUJLd89b1Pj14knOTPNxNUf5skhtnUUuSk5I8muTpro4vdesvSLKzq+PuJGumWce8elZ18zlum1UdSV5K8v0kTx2dJm5G75FeprLvLfhJVgF/A/wG8GHgM0k+3NPuvwlsWrBuFtODHwH+uKouAi4DPtv9H/Rdy0+Bq6rqYmADsCnJZcCtwG1dHa8DN0y5jqNuZDBl+1GzquNjVbVh3tdns3iP9DOVfVX18gdcDjw4b/lm4OYe938+sHve8l5gbXd/LbC3r1rm1XA/8PFZ1gK8B3gC+BUGA0VWL/Z6TXH/67o381XANiAzquMl4MwF63p9XYBTgP+gO/c2zTr6bOqfA7wyb3l/t25WZjo9eJLzgUuAnbOopWteP8VgktTtwA+Bw1V1pNukr9fnduALwJvd8hkzqqOAh5I8nmSuW9f369LbVPZ9Bn+x6YCa/EohyfuAe4HPVdWPZ1FDVb1RVRsYHHEvBS5abLNp1pDkGuBQVT0+f3XfdXSuqKqPMOiKfjbJr/awz4WWNZX9UvQZ/P3AufOW1wEHetz/QiNNDz5pSd7FIPR/X1XfnWUtAFV1GHiEwTmHU5Mc/f1GH6/PFcCnkrwE3MWguX/7DOqgqg50t4eA7zH4MOz7dVnWVPZL0WfwHwPWd2ds1wCfZjBF96z0Pj14kgB3AHuq6quzqiXJ+5Oc2t1/N/DrDE4iPQz8Vl91VNXNVbWuqs5n8H7456r6nb7rSPLeJCcfvQ98AthNz69L9TmV/bRPmiw4SXE18AMG/ck/63G/3wIOAj9j8Kl6A4O+5A5gX3d7eg91fJRBs/UZ4Knu7+q+awF+GXiyq2M38Bfd+guBR4EXgG8Dv9jja3QlsG0WdXT7e7r7e/boe3NG75ENwK7utbkPOG0adThyT2qQI/ekBhl8qUEGX2qQwZcaZPClBhl8qUEGX2qQwZca9H+3syltvu9O5AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "X_batch, y_batch = chemceptgenerator.next()\n",
    "plt.imshow(X_batch[0,:,:,:3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Morgan Fingerprints as example of another vectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen.vectorizers import MorganDictVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "mdv = MorganDictVectorizer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[  98513984  219692797  535847852  864674487  864942730  951226070\n",
      " 1074692693 1113276223 1275884092 1412710081 1465074879 1471352294\n",
      " 1510328189 1740632203 1775209781 1963848833 2064788354 2119439498\n",
      " 2143075994 2154975788 2245384272 2246699815 2246703798 2246728737\n",
      " 2246997334 2281069397 2534373880 2763854213 2959890341 2968968094\n",
      " 2976033787 2976816164 3075056557 3116051204 3217380708 3218693969\n",
      " 3542456614 3586270004 3600182528 3643586416 3696402029 3999906991\n",
      " 4172736314 4194366826 4208894168 4212392629]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "46"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Fit analyses the dataset and set the keys mapping\n",
    "mdv.fit(mols[0:1])\n",
    "print(mdv.keys)\n",
    "mdv.dims"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([3., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
       "        1., 1., 1., 2., 1., 1., 2., 1., 1., 1., 1., 1., 3., 1., 1., 1., 2.,\n",
       "        1., 5., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1.]), 0)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mdv.transform_mol(mols[0],misses=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0., 0., 0., 1., 2., 2., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,\n",
       "        0., 0., 0., 2., 1., 0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "        4., 3., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 30)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mdv.transform_mol(mols[1], misses=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "arr, misses = mdv.transform(mols, misses=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a98310898>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAicAAAECCAYAAAAl2XfFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAEe5JREFUeJzt3WGMXWlZB/DnYWZ30HUJrLs0ZMoIYxrCftA17WxI8cPailmQCCaaQG1CkzZrUkkw0Rj0C2piox8UPpSYrO2GTaooUZGNIeqmQ4MfDO1URl2ChJViqbvZlQBp3Zbuln390EvaLdP2PTNz7n3PzO+XNDP39sk5z97nnLv/nrnzniylBABAK1416QYAAK4nnAAATRFOAICmCCcAQFOEEwCgKcIJANCUiYaTzHw4M7+SmU9n5ocn2Qu3l5mPZebzmfnUdc/dk5lPZuZXR19fN8keWVlmvjEzP5eZX87ML2Xmh0bPm98AZOarM/NkZv7baH6/N3r+zZn5hdH8/ioz75x0r6wsM6cy84uZ+fejx2Z3CxMLJ5k5FREfj4h3RsT9EfH+zLx/Uv1Q5RMR8fANz304Io6XUrZFxPHRY9pzJSJ+o5Ty1oh4W0T82uh8M79huBwRu0opPxkRD0TEw5n5toj4o4j46Gh+346I/RPskVv7UER8+brHZncLk7xy8mBEPF1K+Vop5cWI+MuIeM8E++E2Simfj4hv3fD0eyLi8dH3j0fEe8faFFVKKc+WUv519P2FuPomORvmNwjlqv8bPbxj9KdExK6I+OvR8+bXqMzcGhE/HxFHRo8zzO6WJhlOZiPiG9c9Pjd6jmHZUkp5NuLq/wAj4vUT7ofbyMw3RcRPRcQXwvwGY/RjgeWIeD4inoyI/4qI75RSroxKvIe262MR8VsR8fLo8Y+G2d3SJMNJrvCctfShR5n5IxHxNxHx66WU85Puh3qllO+VUh6IiK1x9crzW1cqG29X3E5mvjsini+lnL7+6RVKze460xPc97mIeON1j7dGxDMT6oXVey4z31BKeTYz3xBX/1VHgzLzjrgaTP68lPK3o6fNb2BKKd/JzBNx9bNDr83M6dG/wL2HtuntEfELmfmuiHh1RLwmrl5JMbtbmOSVk1MRsW30ieU7I+J9EfHEBPthdZ6IiA+Mvv9ARHxmgr1wE6OfcR+NiC+XUv7kur8yvwHIzPsy87Wj738oIn42rn5u6HMR8UujMvNrUCnlt0spW0spb4qr/59bLKX8SpjdLeUk70o8SpIfi4ipiHislPIHE2uG28rMT0bEQxFxb0Q8FxEfiYi/i4hPRcRcRJyNiF8updz4oVkmLDN/OiL+OSL+I6793Pt34urnTsyvcZn5E3H1Q5NTcfUflZ8qpfx+Zs7H1V8muCcivhgRe0splyfXKbeSmQ9FxG+WUt5tdrc20XACAHAjK8QCAE0RTgCApggnAEBThBMAoCnCCQDQlCbCSWY+MukeWB2zGzbzGy6zGzbzu7UmwklEGNJwmd2wmd9wmd2wmd8ttBJOAAAiYsyLsE3dfVeZvu+1P/D8yxdeiFfdfdcrnrvzmyvdF6ltV+6q73n6hY2x+N1LL70Qd9xx1+0LmaibHZtXLr4Q0z/8g/Pr6/jcjOfIzbx4b/1/30rvh5M49/qa31ve/M3q2q+cube6tmU3m99GPke++91vx0svvlD1HzjWG/9N3/fa2HroYFXt3JGpnrtZf88tzFTXbjlllWLGp8uxGdHf8ekcuebsge9V17byftjX/I4fO1pdu3vv/uraIdrI58jSycPVtWv6sU5mPpyZX8nMpzPzw2vZFgBAxBrCSWZORcTHI+KdEXF/RLw/M+9fr8YAgM1pLVdOHoyIp0spXyulvBhX7674nvVpCwDYrNYSTmYj4hvXPT43eg4AYNXWEk5W+sTtD3x0ODMfycylzFx6+cILa9gdALAZrCWcnIuIN173eGtEPHNjUSnl0VLKjlLKjht/XRgA4EZrCSenImJbZr45M++MiPdFxBPr0xYAsFmtep2TUsqVzPxgRPxjRExFxGOllC+tW2cAwKa0pkXYSimfjYjP1tbPnLkU83uW17LLdXFl1/bq2q6LV9WaXjzdy3aHqMs8vG6rtLCzt013mkmHPjb6rOcXJ91Bd7Mdel5Yrl9kbtuJfdW1c/UtDPIY6vIaD02Wi9W17q0DADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8uS91vOXW5x07glbrehqGv49M5cs3ZA/XLu7fyftjX/I4fO1pdu3vv/uraIdrI58jSycNx4fy5rKl15QQAaIpwAgA0RTgBAJoinAAATRFOAICmCCcAQFOEEwCgKcIJANAU4QQAaIpwAgA0RTgBAJoyPc6dzZy5FPN7lse5yxVd2bW9urbrPUlqTS+e7mW7Q9RlHl63VVrY2dumO82kQx8bfdbzi5PuoLvZDj0vLNffO2jbiX3VtXP1LQzyGOryGg9NlovVta6cAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKZkKWVsO5uZny1bDx2sqp07MtVzN+uvy1L3W05d7rETeKWut2Ho6/h0jlxz9kD98u6tvB/2Nb/jx45W1+7eu7+6dog28jmydPJwXDh/LmtqXTkBAJoinAAATRFOAICmCCcAQFOEEwCgKcIJANAU4QQAaIpwAgA0RTgBAJoinAAATZke585mzlyK+T3L49zliq7s2l5d23XZ71rTi6d72e4QdZmH122VFnb2tulOM+nQx0af9fzipDvobrZDzwvL9cvzbzuxr7p2rr6FQR5DXV7joclysbrWlRMAoCnCCQDQlDX9WCczvx4RFyLiexFxpZSyYz2aAgA2r/X4zMnPlFK+uQ7bAQDwYx0AoC1rDSclIv4pM09n5iMrFWTmI5m5lJlLL8XlNe4OANjo1vpjnbeXUp7JzNdHxJOZ+Z+llM9fX1BKeTQiHo2IeE3eU9a4PwBgg1vTlZNSyjOjr89HxKcj4sH1aAoA2LxWHU4y867MvPv730fEz0XEU+vVGACwOa3lxzpbIuLTmfn97fxFKeUf1qUrAGDTylLG9zGQmfnZsvXQwarauSNTPXez/rosdb/llA8HMz5db8PQ1/HpHLnm7IH65d1beT/sa37Hjx2trt29d3917RBt5HNk6eThuHD+XNbU+lViAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0JS13Funs5kzl2J+z/I4d7miK7u2V9d2Xfa71vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL98vzbTuyrrp2rb2GQx1CX13hoslysrnXlBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8u9+HZcupyj53AK3W9R1Rfx6dz5JqzB+rvPdPK+2Ff8zt+7Gh17e69+6trh2gjnyNLJw/HhfPnsqbWlRMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQlOlx7mzmzKWY37M8zl2u6Mqu7dW1XZf9rjW9eLqX7Q5Rl3l43VZpYWdvm+40kw59bPRZzy9OuoPuZjv0vLBcvzz/thP7qmvn6lsY5DHU5TUemiwXq2tdOQEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNyVLK2HY2Mz9bth46WFU7d2Sq527WX5el7recutxjJ/BKXW/D0Nfx6Ry55uyB+uXdW3k/7Gt+x48dra7dvXd/de0QbeRzZOnk4bhw/lzW1LpyAgA05bbhJDMfy8znM/Op6567JzOfzMyvjr6+rt82AYDNoubKySci4uEbnvtwRBwvpWyLiOOjxwAAa3bbcFJK+XxEfOuGp98TEY+Pvn88It67zn0BAJvUaj9zsqWU8mxExOjr69evJQBgM+v9A7GZ+UhmLmXm0ssXXuh7dwDAwK02nDyXmW+IiBh9ff5mhaWUR0spO0opO151912r3B0AsFmsNpw8EREfGH3/gYj4zPq0AwBsdjW/SvzJiPiXiHhLZp7LzP0R8YcR8Y7M/GpEvGP0GABgzaZvV1BKef9N/mr3OvcCAHD7cLKeZs5civk9y+Pc5Yqu7NpeXdt12e9a04une9nuEHWZh9dtlRZ29rbpTjPp0MdGn/X84qQ76G62Q88Ly/XL8287sa+6dq6+hUEeQ11e46HJcrG61vL1AEBThBMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoClZShnbzmbmZ8vWQweraueOTPXczfrrstT9llOXe+wEXqnrbRj6Oj6dI9ecPVC/vHsr74d9ze/4saPVtbv37q+uHaKNfI4snTwcF86fy5paV04AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKAp0+Pc2cyZSzG/Z3mcu1zRlV3bq2u73pOk1vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL9vYO2ndhXXTtX38Igj6Eur/HQZLlYXevKCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQFOEEAGhKllLGtrOZ+dmy9dDBqtq5I1M9d7P+uix1v+XU5R47gVfqehuGvo5P58g1Zw/UL+/eyvthX/M7fuxode3uvfura4doI58jSycPx4Xz57Km1pUTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0JTpce5s5sylmN+zPM5drujKru3VtV2X/a41vXi6l+0OUZd5eN1WaWFnb5vuNJMOfWz0Wc8vTrqD7mY79LywXL88/7YT+6pr5+pbGOQx1OU1HposF6trXTkBAJpy23CSmY9l5vOZ+dR1z/1uZv5PZi6P/ryr3zYBgM2i5srJJyLi4RWe/2gp5YHRn8+ub1sAwGZ123BSSvl8RHxrDL0AAKzpMycfzMx/H/3Y53Xr1hEAsKmtNpz8aUT8eEQ8EBHPRsQf36wwMx/JzKXMXHopLq9ydwDAZrGqcFJKea6U8r1SyssR8WcR8eAtah8tpewopey4I/r5tVwAYONYVTjJzDdc9/AXI+Kpm9UCAHRx20XYMvOTEfFQRNybmeci4iMR8VBmPhARJSK+HhG/2mOPAMAmcttwUkp5/wpPH+2hFwCAyFLK2HY2Mz9bth46WFU7d2Sq527WX5el7rec8uFgxqfrbRj6Oj6dI9ecPVC/vHsr74d9ze/4sfp/7+7eu7+6dog28jmydPJwXDh/LmtqLV8PADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmnLbe+usp5kzl2J+z/I4d7miK7u2V9d2Xfa71vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL98vzbTuyrrp2rb2GQx1CX13hoslysrnXlBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8u9+HZcupyj53AK3W9R1Rfx6dz5JqzB+rvPdPK+2Ff8zt+7Gh17e69+6trh2gjnyNLJw/HhfPnsqbWlRMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQlOlx7mzmzKWY37M8zl2u6Mqu7dW1XZf9rjW9eLqX7Q5Rl3l43VZpYWdvm+40kw59bPRZzy9OuoPuZjv0vLBcvzz/thP7qmvn6lsY5DHU5TUemiwXq2tdOQEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNyVLK+HaW+b8R8d8r/NW9EfHNsTXCejK7YTO/4TK7YduM8/uxUsp9NYVjDSc3bSJzqZSyY9J90J3ZDZv5DZfZDZv53Zof6wAATRFOAICmtBJOHp10A6ya2Q2b+Q2X2Q2b+d1CE585AQD4vlaunAAARIRwAgA0RjgBAJoinAAATRFOAICm/D8fhHaj1xK2vAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 662.4x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "arr2 = mdv.transform(mols, misses=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a98362f98>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAicAAAECCAYAAAAl2XfFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAEe5JREFUeJzt3WGMXWlZB/DnYWZ30HUJrLs0ZMoIYxrCftA17WxI8cPailmQCCaaQG1CkzZrUkkw0Rj0C2piox8UPpSYrO2GTaooUZGNIeqmQ4MfDO1URl2ChJViqbvZlQBp3Zbuln390EvaLdP2PTNz7n3PzO+XNDP39sk5z97nnLv/nrnzniylBABAK1416QYAAK4nnAAATRFOAICmCCcAQFOEEwCgKcIJANCUiYaTzHw4M7+SmU9n5ocn2Qu3l5mPZebzmfnUdc/dk5lPZuZXR19fN8keWVlmvjEzP5eZX87ML2Xmh0bPm98AZOarM/NkZv7baH6/N3r+zZn5hdH8/ioz75x0r6wsM6cy84uZ+fejx2Z3CxMLJ5k5FREfj4h3RsT9EfH+zLx/Uv1Q5RMR8fANz304Io6XUrZFxPHRY9pzJSJ+o5Ty1oh4W0T82uh8M79huBwRu0opPxkRD0TEw5n5toj4o4j46Gh+346I/RPskVv7UER8+brHZncLk7xy8mBEPF1K+Vop5cWI+MuIeM8E++E2Simfj4hv3fD0eyLi8dH3j0fEe8faFFVKKc+WUv519P2FuPomORvmNwjlqv8bPbxj9KdExK6I+OvR8+bXqMzcGhE/HxFHRo8zzO6WJhlOZiPiG9c9Pjd6jmHZUkp5NuLq/wAj4vUT7ofbyMw3RcRPRcQXwvwGY/RjgeWIeD4inoyI/4qI75RSroxKvIe262MR8VsR8fLo8Y+G2d3SJMNJrvCctfShR5n5IxHxNxHx66WU85Puh3qllO+VUh6IiK1x9crzW1cqG29X3E5mvjsini+lnL7+6RVKze460xPc97mIeON1j7dGxDMT6oXVey4z31BKeTYz3xBX/1VHgzLzjrgaTP68lPK3o6fNb2BKKd/JzBNx9bNDr83M6dG/wL2HtuntEfELmfmuiHh1RLwmrl5JMbtbmOSVk1MRsW30ieU7I+J9EfHEBPthdZ6IiA+Mvv9ARHxmgr1wE6OfcR+NiC+XUv7kur8yvwHIzPsy87Wj738oIn42rn5u6HMR8UujMvNrUCnlt0spW0spb4qr/59bLKX8SpjdLeUk70o8SpIfi4ipiHislPIHE2uG28rMT0bEQxFxb0Q8FxEfiYi/i4hPRcRcRJyNiF8updz4oVkmLDN/OiL+OSL+I6793Pt34urnTsyvcZn5E3H1Q5NTcfUflZ8qpfx+Zs7H1V8muCcivhgRe0splyfXKbeSmQ9FxG+WUt5tdrc20XACAHAjK8QCAE0RTgCApggnAEBThBMAoCnCCQDQlCbCSWY+MukeWB2zGzbzGy6zGzbzu7UmwklEGNJwmd2wmd9wmd2wmd8ttBJOAAAiYsyLsE3dfVeZvu+1P/D8yxdeiFfdfdcrnrvzmyvdF6ltV+6q73n6hY2x+N1LL70Qd9xx1+0LmaibHZtXLr4Q0z/8g/Pr6/jcjOfIzbx4b/1/30rvh5M49/qa31ve/M3q2q+cube6tmU3m99GPke++91vx0svvlD1HzjWG/9N3/fa2HroYFXt3JGpnrtZf88tzFTXbjlllWLGp8uxGdHf8ekcuebsge9V17byftjX/I4fO1pdu3vv/uraIdrI58jSycPVtWv6sU5mPpyZX8nMpzPzw2vZFgBAxBrCSWZORcTHI+KdEXF/RLw/M+9fr8YAgM1pLVdOHoyIp0spXyulvBhX7674nvVpCwDYrNYSTmYj4hvXPT43eg4AYNXWEk5W+sTtD3x0ODMfycylzFx6+cILa9gdALAZrCWcnIuIN173eGtEPHNjUSnl0VLKjlLKjht/XRgA4EZrCSenImJbZr45M++MiPdFxBPr0xYAsFmtep2TUsqVzPxgRPxjRExFxGOllC+tW2cAwKa0pkXYSimfjYjP1tbPnLkU83uW17LLdXFl1/bq2q6LV9WaXjzdy3aHqMs8vG6rtLCzt013mkmHPjb6rOcXJ91Bd7Mdel5Yrl9kbtuJfdW1c/UtDPIY6vIaD02Wi9W17q0DADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8uS91vOXW5x07glbrehqGv49M5cs3ZA/XLu7fyftjX/I4fO1pdu3vv/uraIdrI58jSycNx4fy5rKl15QQAaIpwAgA0RTgBAJoinAAATRFOAICmCCcAQFOEEwCgKcIJANAU4QQAaIpwAgA0RTgBAJoyPc6dzZy5FPN7lse5yxVd2bW9urbrPUlqTS+e7mW7Q9RlHl63VVrY2dumO82kQx8bfdbzi5PuoLvZDj0vLNffO2jbiX3VtXP1LQzyGOryGg9NlovVta6cAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKZkKWVsO5uZny1bDx2sqp07MtVzN+uvy1L3W05d7rETeKWut2Ho6/h0jlxz9kD98u6tvB/2Nb/jx45W1+7eu7+6dog28jmydPJwXDh/LmtqXTkBAJoinAAATRFOAICmCCcAQFOEEwCgKcIJANAU4QQAaIpwAgA0RTgBAJoinAAATZke585mzlyK+T3L49zliq7s2l5d23XZ71rTi6d72e4QdZmH122VFnb2tulOM+nQx0af9fzipDvobrZDzwvL9cvzbzuxr7p2rr6FQR5DXV7joclysbrWlRMAoCnCCQDQlDX9WCczvx4RFyLiexFxpZSyYz2aAgA2r/X4zMnPlFK+uQ7bAQDwYx0AoC1rDSclIv4pM09n5iMrFWTmI5m5lJlLL8XlNe4OANjo1vpjnbeXUp7JzNdHxJOZ+Z+llM9fX1BKeTQiHo2IeE3eU9a4PwBgg1vTlZNSyjOjr89HxKcj4sH1aAoA2LxWHU4y867MvPv730fEz0XEU+vVGACwOa3lxzpbIuLTmfn97fxFKeUf1qUrAGDTylLG9zGQmfnZsvXQwarauSNTPXez/rosdb/llA8HMz5db8PQ1/HpHLnm7IH65d1beT/sa37Hjx2trt29d3917RBt5HNk6eThuHD+XNbU+lViAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0JS13Funs5kzl2J+z/I4d7miK7u2V9d2Xfa71vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL98vzbTuyrrp2rb2GQx1CX13hoslysrnXlBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8u9+HZcupyj53AK3W9R1Rfx6dz5JqzB+rvPdPK+2Ff8zt+7Gh17e69+6trh2gjnyNLJw/HhfPnsqbWlRMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQlOlx7mzmzKWY37M8zl2u6Mqu7dW1XZf9rjW9eLqX7Q5Rl3l43VZpYWdvm+40kw59bPRZzy9OuoPuZjv0vLBcvzz/thP7qmvn6lsY5DHU5TUemiwXq2tdOQEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNyVLK2HY2Mz9bth46WFU7d2Sq527WX5el7recutxjJ/BKXW/D0Nfx6Ry55uyB+uXdW3k/7Gt+x48dra7dvXd/de0QbeRzZOnk4bhw/lzW1LpyAgA05bbhJDMfy8znM/Op6567JzOfzMyvjr6+rt82AYDNoubKySci4uEbnvtwRBwvpWyLiOOjxwAAa3bbcFJK+XxEfOuGp98TEY+Pvn88It67zn0BAJvUaj9zsqWU8mxExOjr69evJQBgM+v9A7GZ+UhmLmXm0ssXXuh7dwDAwK02nDyXmW+IiBh9ff5mhaWUR0spO0opO151912r3B0AsFmsNpw8EREfGH3/gYj4zPq0AwBsdjW/SvzJiPiXiHhLZp7LzP0R8YcR8Y7M/GpEvGP0GABgzaZvV1BKef9N/mr3OvcCAHD7cLKeZs5civk9y+Pc5Yqu7NpeXdt12e9a04une9nuEHWZh9dtlRZ29rbpTjPp0MdGn/X84qQ76G62Q88Ly/XL8287sa+6dq6+hUEeQ11e46HJcrG61vL1AEBThBMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoClZShnbzmbmZ8vWQweraueOTPXczfrrstT9llOXe+wEXqnrbRj6Oj6dI9ecPVC/vHsr74d9ze/4saPVtbv37q+uHaKNfI4snTwcF86fy5paV04AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKAp0+Pc2cyZSzG/Z3mcu1zRlV3bq2u73pOk1vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL9vYO2ndhXXTtX38Igj6Eur/HQZLlYXevKCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQFOEEAGhKllLGtrOZ+dmy9dDBqtq5I1M9d7P+uix1v+XU5R47gVfqehuGvo5P58g1Zw/UL+/eyvthX/M7fuxode3uvfura4doI58jSycPx4Xz57Km1pUTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0JTpce5s5sylmN+zPM5drujKru3VtV2X/a41vXi6l+0OUZd5eN1WaWFnb5vuNJMOfWz0Wc8vTrqD7mY79LywXL88/7YT+6pr5+pbGOQx1OU1HposF6trXTkBAJpy23CSmY9l5vOZ+dR1z/1uZv5PZi6P/ryr3zYBgM2i5srJJyLi4RWe/2gp5YHRn8+ub1sAwGZ123BSSvl8RHxrDL0AAKzpMycfzMx/H/3Y53Xr1hEAsKmtNpz8aUT8eEQ8EBHPRsQf36wwMx/JzKXMXHopLq9ydwDAZrGqcFJKea6U8r1SyssR8WcR8eAtah8tpewopey4I/r5tVwAYONYVTjJzDdc9/AXI+Kpm9UCAHRx20XYMvOTEfFQRNybmeci4iMR8VBmPhARJSK+HhG/2mOPAMAmcttwUkp5/wpPH+2hFwCAyFLK2HY2Mz9bth46WFU7d2Sq527WX5el7rec8uFgxqfrbRj6Oj6dI9ecPVC/vHsr74d9ze/4sfp/7+7eu7+6dog28jmydPJwXDh/LmtqLV8PADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmnLbe+usp5kzl2J+z/I4d7miK7u2V9d2Xfa71vTi6V62O0Rd5uF1W6WFnb1tutNMOvSx0Wc9vzjpDrqb7dDzwnL98vzbTuyrrp2rb2GQx1CX13hoslysrnXlBABoinACADRFOAEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmpKllLHtbGZ+tmw9dLCqdu7IVM/drL8u9+HZcupyj53AK3W9R1Rfx6dz5JqzB+rvPdPK+2Ff8zt+7Gh17e69+6trh2gjnyNLJw/HhfPnsqbWlRMAoCnCCQDQFOEEAGiKcAIANEU4AQCaIpwAAE0RTgCApggnAEBThBMAoCnCCQDQlOlx7mzmzKWY37M8zl2u6Mqu7dW1XZf9rjW9eLqX7Q5Rl3l43VZpYWdvm+40kw59bPRZzy9OuoPuZjv0vLBcvzz/thP7qmvn6lsY5DHU5TUemiwXq2tdOQEAmiKcAABNEU4AgKYIJwBAU4QTAKApwgkA0BThBABoinACADRFOAEAmiKcAABNyVLK+HaW+b8R8d8r/NW9EfHNsTXCejK7YTO/4TK7YduM8/uxUsp9NYVjDSc3bSJzqZSyY9J90J3ZDZv5DZfZDZv53Zof6wAATRFOAICmtBJOHp10A6ya2Q2b+Q2X2Q2b+d1CE585AQD4vlaunAAARIRwAgA0RjgBAJoinAAATRFOAICm/D8fhHaj1xK2vAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 662.4x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(arr2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hashed  Fingerprints"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Morgan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen.vectorizers import HashedMorganVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "hmv = HashedMorganVectorizer(nBits=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a983b9c18>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6IAAAB8CAYAAABzJA1FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAEGJJREFUeJzt3U+obVd9B/Dvr7EaIoYaYkJapYikhRTah4RYEEpE1OgkCi2YQRuK5Tkw86YjHTqoCEUrPGlIHKhIIZhB8E+dZNJSIwQbpdZgUxsT8iqWIgi2iauDe159vZ7rOW/fvdfa597PB8J99+SctdZZZ6199rp7/X67WmsBAACAXn5ldAMAAAA4XyxEAQAA6MpCFAAAgK4sRAEAAOjKQhQAAICuLEQBAADoqvtCtKruqarvVNUzVfVg7/rhPKmqZ6vqn6rqqap6cvPYTVX11ar67ubna0e3Ew5dVT1UVZer6umrHts61+rIX22+B79ZVW8e13I4bCfMvY9U1Q82331PVdV7rvp/f7GZe9+pqneNaTWQdF6IVtV1ST6Z5N1J7khyX1Xd0bMNcA69rbV2obV25+b3B5N8rbV2e5KvbX4HTufhJPcce+ykufbuJLdv/ruY5FOd2ghn0cP5xbmXJB/ffPddaK09niSbc873J/mdzWv+enNuCgzQ+4roXUmeaa19r7X230k+n+Tezm2A8+7eJI9s/v1IkvcObAucCa21J5L86NjDJ821e5N8ph35hyS/VlW39WkpnC0nzL2T3Jvk8621n7bW/jXJMzk6NwUG6L0Q/Y0k/37V789tHgOW0ZJ8paq+UVUXN4/d2lp7IUk2P28Z1jo4206aa74LYXkPbLa+P3RVCIq5ByvSeyFaWx5rndsA58lbW2tvztFWwA9V1R+MbhDguxAW9qkkb0pyIckLST62edzcgxXpvRB9Lskbrvr99Ume79wGODdaa89vfl5O8miOtiC9eGUb4Obn5XEthDPtpLnmuxAW1Fp7sbX2cmvtZ0k+nZ9vvzX3YEV6L0S/nuT2qnpjVb0yRwHjj3VuA5wLVfXqqnrNlX8neWeSp3M05+7fPO3+JF8c00I4806aa48l+ZNN9tzfT/JfV7bwAqd3LOb6fTn67kuO5t77q+pVVfXGHCUM+8fe7QOOvKJnZa21l6rqgSRfTnJdkodaa9/q2QY4R25N8mhVJUdz/bOttS9V1deTfKGqPpDk+0n+aGAb4Uyoqs8luTvJzVX1XJIPJ/lots+1x5O8J0eJUn6S5E+7NxjOiBPm3t1VdSFH226fTfLBJGmtfauqvpDk20leSvKh1trLI9oNJNWarfEAAAD003trLgAAAOechSgAAABdWYgCAADQlYUoAAAAXVmIAgAA0NWQhWhVXRxRL5x35h6MY/7BGOYerNOpFqJVdU9VfaeqnqmqB6/hpQ4IMIa5B+OYfzCGuQcrNHkhWlXXJflkkncnuSPJfVV1x1wNAwAA4Gx6xSlee1eSZ1pr30uSqvp8knuTfPukF7yyXtWuz6tzfW7IjXVT26eS3/rdn+x8zr9884b9WtzJPm3eR8/3dbzN+9S97X3uet2U1yxZznnzy+aePj29XfNodB9POTYt1b4l+6Ln+7yWY+e1fPctpecYnPK9clb0OhZMLWeuz+ZQPuPTzr05zpFGH+vnGl9zlLs2PefRXN9Pvc7bT3rdrnJ+nP/8YWvtdbteV61Nm5dV9YdJ7mmt/dnm9z9O8pbW2gMnvebGuqm9pd5+TfV8+fmndj7nXb9+4ZrKXNo+bd5Hz/d1vM371L3tfe563ZTXLFkOP6dPT2/XPBrdx1OOTUu1b8m+6Pk+pxw7R+o5Bg+tb+bU61gwtZy5Ppvz8hnPcY40+lg/1/iao9y16TmP5vp+6nXeftLrdpXzd+1vv9Fau3PX605zRbS2PPYLq9pNgPjF5OgvUgAAAJxvp1mIPpfkDVf9/vokzx9/UmvtUpJLSWbbkrT2v77s85fPXa/prddfa+d6n1P/OnRe/no7xdqufPX8a99IPcfk2vtiH1P6q2efOsaQTDsPmKOeqaYeO9d0lW30VeaRlhpfPR3isXNKG5e6Ajm17n36fcnP4jRZc7+e5PaqemNVvTLJ+5M8Nk+zAAAAOKsmXxFtrb1UVQ8k+XKS65I81Fr71mwtAwAA4Ew6zdbctNYeT/L4TG0BAADgHDjN1lwAAAC4Zqe6IjrK2gOapwSNjw6O75VUaHQygbWNlTVZagxOLWOpJABrMzql/y5r69M1JUfZVvba+osxpty+ZY56ttW1j57jf6k5MjoZ4ki9kmMtaW19uo9et29Zsu6RyZMSV0QBAADozEIUAACArixEAQAA6OogY0TXvo98yl790e+pVyzn6BiOtcd5jLRUX8wVk7Bk/MNIPcfk2vtiH2uKSdtWtmMMSb+YvbnGV8+8C0udE43OQTHSIcaEHneIx86l4rGXmiNTz4uX/CxcEQUAAKArC1EAAAC6shAFAACgKwtRAAAAujrIZEVrD2ieEjQ+Oji+V1Kh0ckE1jZW1mSpMTi1jJ43ZR9p5Dzfx9r6dE3JUbaVvbb+Yoxd42Kpm9pvq2sfPcf/UnNkdDLEkXolx1rS2vp0H3MlVRxZ98jkSYkrogAAAHRmIQoAAEBXFqIAAAB0daoY0ap6NsmPk7yc5KXW2p1zNGqXte8jn7JXf/R76hXLOTqGY+1xHiMt1RdzxSQsGf8wUs8xufa+2MeaYtK2le0YQ9IvZm+u8dUz78JS50Sjc1CMdIgxoccd4rFzqXjspebI1PPiJT+LOZIVva219sMZygEAAOAcsDUXAACArk67EG1JvlJV36iqi3M0CAAAgLPttFtz39pae76qbkny1ar659baE1c/YbNAvZgk1+eGU1YHAADAoTvVQrS19vzm5+WqejTJXUmeOPacS0kuJcmNdVM7TX1XrD2geUrQ+Ojg+F5JhUYnE1jbWFmTpcbg1DJ63pR9pJHzfB9r69M1JUfZVvba+osxdo2LpW5qv62uffQc/0vNkdHJEEfqlRxrSWvr033MlVRxZN0jkyclp9iaW1WvrqrXXPl3kncmeXpqeQAAAJwPp7kiemuSR6vqSjmfba19aZZWAQAAcGZNXoi21r6X5PdmbAsAAADnwBz3Ee1u7fvIp+zVH/2eesVyjo7hWHucx0hL9cVcMQlLxj+M1HNMrr0v9rGmmLRtZTvGkPSL2ZtrfPXMu7DUOdHoHBQjHWJM6HGHeOxcKh57qTky9bx4yc/CfUQBAADoykIUAACArixEAQAA6MpCFAAAgK4OMlnR2gOapwSNjw6O75VUaHQygbWNlTVZagxOLaPnTdlHGjnP97G2Pl1TcpRtZa+tvxhj17hY6qb22+raR8/xv9QcGZ0McaReybGWtLY+3cdcSRVH1j0yeVLiiigAAACdWYgCAADQlYUoAAAAXR1kjOja95FP2as/+j31iuUcHcOx9jiPkZbqi7liEpaMfxip55hce1/sY00xadvKdowh6RezN9f46pl3YalzotE5KEY6xJjQ4w7x2LlUPPZSc2TqefGSn4UrogAAAHRlIQoAAEBXFqIAAAB0tXMhWlUPVdXlqnr6qsduqqqvVtV3Nz9fu2wzAQAAOCv2SVb0cJJPJPnMVY89mORrrbWPVtWDm9//fP7mbbf2gOYpQeOjg+N7JRUanUxgbWNlTZYag1PL6HlT9pFGzvN9rK1P15QcZVvZa+svxtg1Lpa6qf22uvbRc/wvNUdGJ0McqVdyrCWtrU/3MVdSxZF1j0yelOxxRbS19kSSHx17+N4kj2z+/UiS906qHQAAgHNnaozora21F5Jk8/OW+ZoEAADAWbb4fUSr6mKSi0lyfW5YujoAAABWbupC9MWquq219kJV3Zbk8klPbK1dSnIpSW6sm9rE+v6fte8jn7JXf/R76hXLOTqGY+1xHiMt1RdzxSQsGf8wUs8xufa+2MeaYtK2le0YQ9IvZm+u8dUz78JS50Sjc1CMdIgxoccd4rFzqXjspebI1PPiJT+LqVtzH0ty/+bf9yf54jzNAQAA4Kzb5/Ytn0vy90l+u6qeq6oPJPlokndU1XeTvGPzOwAAAOy0c2tua+2+E/7X22duCwAAAOfA1K25AAAAMMniWXOXsPaA5ilB46OD43slFRqdTGBtY2VNlhqDU8voeVP2kUbO832srU/XlBxlW9lr6y/G2DUulrqp/ba69tFz/C81R0YnQxypV3KsJa2tT/cxV1LFkXWPTJ6UuCIKAABAZxaiAAAAdGUhCgAAQFcHGSO69n3kU/bqj35PvWI5R8dwrD3OY6Sl+mKumIQl4x9G6jkm194X+1hTTNq2sh1jSPrF7M01vnrmXVjqnGh0DoqRDjEm9LhDPHYuFY+91ByZel685GfhiigAAABdWYgCAADQlYUoAAAAXVmIAgAA0NVBJitae0DzlKDx0cHxvZIKjU4msLaxsiZLjcGpZfS8KftII+f5PtbWp2tKjrKt7LX1F2PsGhdL3dR+W1376Dn+l5ojo5MhjtQrOdaS1tan+5grqeLIukcmT0pcEQUAAKAzC1EAAAC62rkQraqHqupyVT191WMfqaofVNVTm//es2wzAQAAOCv2iRF9OMknknzm2OMfb6395ewt2sPa95FP2as/+j31iuUcHcOx9jiPkZbqi7liEpaMfxip55hce1/sY00xadvKdowh6RezN9f46pl3YalzotE5KEY6xJjQ4w7x2LlUPPZSc2TqefGSn8XOK6KttSeS/GixFgAAAHCunCZG9IGq+uZm6+5rZ2sRAAAAZ9rUheinkrwpyYUkLyT52ElPrKqLVfVkVT35P/npxOoAAAA4KyYtRFtrL7bWXm6t/SzJp5Pc9Uuee6m1dmdr7c5fzaumthMAAIAzYp9kRb+gqm5rrb2w+fV9SZ7+Zc+f29oDmqcEjY8Oju+VVGh0MoG1jZU1WWoMTi2j503ZRxo5z/extj5dU3KUbWWvrb8YY9e4WOqm9tvq2kfP8b/UHBmdDHGkXsmxlrS2Pt3HXEkVR9Y9MnlSssdCtKo+l+TuJDdX1XNJPpzk7qq6kKQleTbJByfVDgAAwLmzcyHaWrtvy8N/s0BbAAAAOAdOkzUXAAAArtmkGNHR1r6PfMpe/dHvqVcs5+gYjrXHeYy0VF/MFZOwZPzDSD3H5Nr7Yh9riknbVrZjDEm/mL25xlfPvAtLnRONzkEx0iHGhB53iMfOpeKxl5ojU8+Ll/wsXBEFAACgKwtRAAAAurIQBQAAoCsLUQAAALo6yGRFaw9onhI0Pjo4vldSodHJBNY2VtZkqTE4tYyeN2UfaeQ838fa+nRNyVG2lb22/mKMXeNiqZvab6trHz3H/1JzZHQyxJF6Jcda0tr6dB9zJVUcWffI5EmJK6IAAAB0ZiEKAABAVxaiAAAAdHWQMaJr30c+Za/+6PfUK5ZzdAzH2uM8RlqqL+aKSVgy/mGknmNy7X2xjzXFpG0r2zGGpF/M3lzjq2fehaXOiUbnoBjpEGNCjzvEY+dS8dhLzZGp58VLfhauiAIAANCVhSgAAABdWYgCAADQlYUoAAAAXVVrrV9lVf+R5N+S3Jzkh90qBq4w92Ac8w/GMPegr99srb1u15O6LkT/r9KqJ1trd3avGM45cw/GMf9gDHMP1snWXAAAALqyEAUAAKCrUQvRS4PqhfPO3INxzD8Yw9yDFRoSIwoAAMD5ZWsuAAAAXVmIAgAA0JWFKAAAAF1ZiAIAANCVhSgAAABd/S+RRrJ3aFIfKwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1152x144 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(hmv.transform(mols))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hashed AtomPairFingerprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from molvecgen.vectorizers import HashedAPVectorizer\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "hmv = HashedAPVectorizer(nBits=100, augment=True, minLength=4, maxLength=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'minLength': 4, 'maxLength': 8}"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hmv.kwargs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a98413438>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6IAAADVCAYAAABNN01jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAHQVJREFUeJzt3X+sn9d5EPDnNI6vqb3Ite/quLfd3JKGUSHWBbvZ7AmVBaGunmiQmFgFqEQkJhIJI+JHCgkaKJlEJGCb8LQoabtVCDomszELVRtNM6nohliO0wQK9dLKuG1s96Y3naHpiB27hz98O9w0+Z7Hvqfnfu+9n49U2fd+n77n+b7vec95n3x971NqrQEAAACjvGGlEwAAAGB9UYgCAAAwlEIUAACAoRSiAAAADKUQBQAAYCiFKAAAAEOtWCFaSnlfKeX3SylfLKV8eKXygCtRSnlbKeX3SimfL6X8j1LKzy59f1sp5VOllC8s/fmmlc4VMkop15RSPltK+U9LX7+9lHJkaS7/+1LKxpXOEVpKKVtLKYdKKceX1ucfsy6z2pRS7ll6tvhcKeUTpZRN1mTWshUpREsp10TEL0fET0bEuyLig6WUd61ELnCFLkTE36u1/smI+NGI+NtLc/fDEfHpWus7I+LTS1/DavCzEfH5y75+KCJ+YWku/0FE/M0VyQquzC9FxO/UWn8oIn44Ls1p6zKrRillLiL+TkTsrrX+qYi4JiJ+JqzJrGEr9YnoeyLii7XWE7XW8xHx6xHxgRXKBdJqrWdqrU8v/f0bcelhZy4uzd+PL4V9PCJuXZkMIa+U8taI2B8RH1n6ukTET0TEoaUQc5mpV0q5LiL+bER8NCKi1nq+1no2rMusPhsi4o+VUjZExBsj4kxYk1nDVqoQnYuIr1z29fNL34NVo5SyKyJ+JCKORMSOWuuZiEvFakS8eeUyg7RfjIh/GBHfWvp6e0ScrbVeWPra2sxq8I6I+FpE/OrSPzP/SCllc1iXWUVqraci4l9ExJfjUgH6vyPiWFiTWcNWqhAtr/G9OjwLuEqllC0R8R8i4u/WWv/PSucDV6qU8lMR8UKt9djl336NUGsz025DRNwUEb9Sa/2RiPhm+Ge4rDJLP8P8gYh4e0S8JSI2x6UfYXs1azJrxkoVos9HxNsu+/qtEXF6hXKBK1JKuTYuFaH/ttb6m0vfXiil7Fx6fWdEvLBS+UHSvoj4i6WUk3HpxyN+Ii59Qrp16Z+FRVibWR2ej4jna61Hlr4+FJcKU+syq8mfj4j/VWv9Wq31lYj4zYjYG9Zk1rCVKkSPRsQ7l34T2Ma49MPYh1coF0hb+hm6j0bE52ut/+qylw5HxIeW/v6hiPjt0bnBlai1/qNa61trrbvi0hr8eK31r0bE70XEX14KM5eZerXWr0bEV0opf2LpW7dExP8M6zKry5cj4kdLKW9cetb49jy2JrNmlVpX5hP+Usr749J/fb8mIj5Wa/35FUkErkAp5ccj4r9ExH+P//9zdf84Lv2c6G9ExA/Epc3kp2utX1+RJOEKlVLeGxF/v9b6U6WUd8SlT0i3RcRnI+Kv1VrPrWR+0FJKeXdc+qVbGyPiRETcFpf+Y7t1mVWjlPLPIuKvxKXf0P/ZiLg9Lv1MqDWZNWnFClEAAADWp5X6p7kAAACsUwpRAAAAhlKIAgAAMJRCFAAAgKEUogAAAAy1ooVoKeXASo4PvZjLrBXmMmuBecxaYS6zlq30J6JuLtYKc5m1wlxmLTCPWSvMZdaslS5EAQAAWGdKrXXYYBs2ba4zW7b90dcXXv5mbNi0+TtjXnqleZx67nz33Jbj4vbNE1+/5sVvDsqkn9Z7ioh4w/n23Cnf+MMuY03bOazf98bv+PqVV74Z1177ne/jLT+4OPEYX178/uY41361/b7LzMZmTK97JjNWxstvuaYZ84b/2/7vZJnzkzHqHL563ryW87Pt+2rT6Yvtsa4y31fiXFwbM3/09Y1/un0Pf+H3tzZjXn7ztc2YTS9Y/5ej1zzudZyRa/urcz5/8Q9j4zXfeb+NfO+jvHJ9+xz3WidXo8z1vP6dZ5sxp7802x5r5+T1q3ypNI/xWl5rLrd8a+OGZkzm+SyzZ2WOM23PKb3Gytx/LdP2nNdr3f5G/MFirbX5oNueqROUUt4XEb8UEddExEdqrf98UvzMlm3xrv33TDzm7PyZ5rgXTpzMJznA2f0/NvH1rf/mvw7KpJ/We4qI2HKqPek3PH6sy1jTdg4vvOfPNGP+yaO/OvH1ux65s3mMuYeeaMZseOuuZkyveyYzVsZzD7YLl5ln2xtg5vxkjDqHmXnz5dvbReaN97cfnHpd89/93WeaMft//NZmzPG7r2/G/NC//mozxvr/+nrN417HGbm2T9t7H+XU39jbjOm1Tq5Gmev5Dw4fbsY8cMdt7bHuW5j8+h3LeuS+Ii/v2t6MyTyfZfaszHGm7Tml11iZ+69l2p7zeq3bj9VDX8qMd9X/NLeUck1E/HJE/GREvCsiPlhKedfVHg8AAID1YTk/I/qeiPhirfVErfV8RPx6RHygT1oAAACsVcspROci4iuXff380ve+QynlQCnlqVLKUxdeXr8/pwAAAMAlyylEX+unrr/rt2zUWh+pte6ute5+9S8mAgAAYP1ZTiH6fES87bKv3xoRp5eXDgAAAGvdcgrRoxHxzlLK20spGyPiZyKi/avHAAAAWNeW1Ue0lPL+iPjFuNS+5WO11p+fFH9d2VZvLrdc9Xi9bXjHrmZMj1+H3GucXsc5+9f7/GrmTD6Zlg2zT7d7a/VqFTPSqXsn/1rvHUfPNY8x8j1d+In2r2nfdPLFZszivp3NmNXYpmmUUetSRO6avzTX7l+WWS8y605mXvSSOYeZ89PjHp22PWKkXjmPulajx2rJ5JKRasExZdcqs6Zknh0W9sw0YzL7dUtm/1zP93nGtJ2f1lzOzK212l7psXroWK11dytuWU2Naq2fjIhPLucYAAAArC/L+ae5AAAAcMUUogAAAAylEAUAAGAohSgAAABDKUQBAAAYSiEKAADAUApRAAAAhlpWH9ErdXH75ji7f3ID4pGN7nsdp9VUOdPsPaNXvr2az2caRd9wz5NdxhrVHDwr05D74IGHJ75+V9zZPMbc4+1cejVvTp3jxFiZe/i5B7c2Y2ae3duM6dUIelQD7My8OXH7xWbMjffvao/V6ZofOf1MM2b//K3NmMWbajNmdr4ZMrRRe+b89Fj/V2Pz+UzOmbW91/646eSLzZgLieNk3ntM0X60sGemGdNrnew1T3vt55m5k7meBx893Ix54Oht7bHuW5gccEf7kTs1/xIye03mOvQ6Tsa0rYMZmfuvh1HPKBFj1+0In4gCAAAwmEIUAACAoRSiAAAADKUQBQAAYCiFKAAAAEMpRAEAABhKIQoAAMBQClEAAACGKrW2m4z3cl3ZVm8utwwbr2VUg9he40xbk/FMPsfvvr4ZM/t0acZsOXW+nc8UNRmPiDh1796Jr+84eq55jJHvKdO4OtM0fnHfzmbM7PyZdj6rsLl1DyMbV2eu+UtzG5sxmfUis+5k5kUvmXM4qpn7tO0RI/XKedS1Gj1WSyaXjEy+03atMmtK5tlhYc9MMyazX7dk9s/1fJ9nTNv5ac3lzNyae+iJLrlMm8fqoWO11t2tOJ+IAgAAMJRCFAAAgKEUogAAAAylEAUAAGAohSgAAABDKUQBAAAYSiEKAADAUApRAAAAhtowcrCL2zfH2f2TGxCPbHTf6zitpsqZZu8ZvfLt1Xw+0yj6hnue7DLWqObgWZmG3AcPPDzx9bvizuYx5h5v59KreXPqHCfGytzDzz24tRkz8+zeZkyvRtCjGmBn5s2J2y82Y268f1d7rE7X/MjpZ5ox++dvbcYs3lSbMbPzzZChjdoz56fH+r8am89ncs6s7b32x00nX2zGXEgcJ/PeY4r2o4U9M82YXutkr3naaz/PzJ3M9Tz46OFmzANHb2uPdd/C5IA72o/cqfmXkNlrMteh13Eypm0dzMjcfz2MekaJGLtuR/hEFAAAgMEUogAAAAylEAUAAGAohSgAAABDKUQBAAAYSiEKAADAUApRAAAAhlKIAgAAMFSptd1kvJfryrZ6c7ll2HgtoxrE9hpn2pqMZ/I5fvf1zZjZp0szZsup8+18pqjJeETEqXv3Tnx9x9FzzWOMfE+ZxtWZpvGL+3Y2Y2bnz7TzWYXNrXsY2bg6c81fmtvYjMmsF5l1JzMvesmcw1HN3KdtjxipV86jrtXosVoyuWRk8p22a5VZUzLPDgt7Zpoxmf26JbN/ruf7PGPazk9rLmfm1txDT3TJZdo8Vg8dq7XubsVtWM4gpZSTEfGNiLgYERcyAwIAALC+LasQXfLnaq2LHY4DAADAOuBnRAEAABhquYVojYj/XEo5Vko50CMhAAAA1rbl/tPcfbXW06WUN0fEp0opx2utn7k8YKlAPRARsSneuMzhAAAAWO2W9YlorfX00p8vRMRvRcR7XiPmkVrr7lrr7muj/dujAAAAWNuuuhAtpWwupXzft/8eEX8hIj7XKzEAAADWpuX809wdEfFbpZRvH+ff1Vp/p0tWAAAArFlXXYjWWk9ExA9fyf/n4vbNcXb/5AbEIxvd9zpOq6lyptl7Rq98ezWfzzSKvuGeJ7uMNao5eFamIffBAw9PfP2uuLN5jLnH27n0at6cOseJsTL38HMPbm3GzDy7txnTqxH0qAbYmXlz4vaLzZgb79/VHqvTNT9y+plmzP75W5sxizfVZszsfDNkaKP2zPnpsf6vxubzmZwza3uv/XHTyRebMRcSx8m895ii/WhhT/tHnnqtk73maa/9PDN3Mtfz4KOHmzEPHL2tPdZ9C5MD7mg/cqfmX0Jmr8lch17HyZi2dTAjc//1MOoZJWLsuh2hfQsAAACDKUQBAAAYSiEKAADAUApRAAAAhlKIAgAAMJRCFAAAgKEUogAAAAylEAUAAGCoUmu7yXgv15Vt9eZyy7DxWkY1iO01zrQ1Gc/kc/zu65sxs0+XZsyWU+fb+UxRk/GIiFP37p34+o6j55rHGPmeMo2rM03jF/ftbMbMzp9p57MKm1v3MLJxdeaavzS3sRmTWS8y605mXvSSOYejmrlP2x4xUq+cR12r0WO1ZHLJyOQ7bdcqs6Zknh0W9sw0YzL7dUtm/1zP93nGtJ2f1lzOzK25h57oksu0eaweOlZr3d2K84koAAAAQylEAQAAGEohCgAAwFAKUQAAAIZSiAIAADCUQhQAAIChFKIAAAAMpRAFAABgqA0jB7u4fXOc3T+5AfHIRve9jtNqqpxp9p7RK99ezeczjaJvuOfJLmONag6elWnIffDAwxNfvyvubB5j7vF2Lr2aN6fOcWKszD383INbmzEzz+5txvRqBD2qAXZm3py4/WIz5sb7d7XH6nTNj5x+phmzf/7WZsziTbUZMzvfDBnaqD1zfnqs/6ux+Xwm58za3mt/3HTyxWbMhcRxMu89pmg/Wtgz04zptU72mqe99vPM3Mlcz4OPHm7GPHD0tvZY9y1MDrij/cidmn8Jmb0mcx16HSdj2tbBjMz918OoZ5SIset2hE9EAQAAGEwhCgAAwFAKUQAAAIZSiAIAADCUQhQAAIChFKIAAAAMpRAFAABgKIUoAAAAQ5Va203Ge7mubKs3l1uGjdcyqkFsr3Gmrcl4Jp/jd1/fjJl9ujRjtpw6385nipqMR0ScunfvxNd3HD3XPMbI95RpXJ1pGr+4b2czZnb+TDufVdjcuoeRjasz1/yluY3NmMx6kVl3MvOil8w5HNXMfdr2iJF65TzqWo0eqyWTS0Ym32m7Vpk1JfPssLBnphmT2a9bMvvner7PM6bt/LTmcmZuzT30RJdcps1j9dCxWuvuVpxPRAEAABhKIQoAAMBQClEAAACGUogCAAAwlEIUAACAoRSiAAAADKUQBQAAYKgNIwe7uH1znN0/ue/TyP6CvY7T6mWV6bGX0SvfXj3/Mv25brjnyS5jTVuP0EwftIMHHp74+l1xZ/MYc4+3c+nWgzBzjhNjZe7h5x7c2oyZeXZyH9aIfv23RvUdy8ybE7dfbMbceP+u9lidrvmR0880Y/bP39qMWbyp3bN6dr4ZMrQ/Xo9ehpn1djX2/BvZszoj1aMxcZxUv8Mp2o9G9insNU977ee9+p0ffPRwM+aBo7e1x7pvYXLAHe1H7m79Njv1ah3an3fK1sGMzP3Xw8jeqCPX7QifiAIAADBYsxAtpXyslPJCKeVzl31vWynlU6WULyz9+abvbZoAAACsFZlPRH8tIt73qu99OCI+XWt9Z0R8eulrAAAAaGoWorXWz0TE11/17Q9ExMeX/v7xiGj/gBAAAADE1f+M6I5a65mIiKU/3/x6gaWUA6WUp0opT114+ZtXORwAAABrxff8lxXVWh+pte6ute7esGnz93o4AAAAptzVFqILpZSdERFLf77QLyUAAADWsqstRA9HxIeW/v6hiPjtPukAAACw1pVaJzcZL6V8IiLeGxGzEbEQET8XEf8xIn4jIn4gIr4cET9da331LzT6LteVbfXmcssyU+5nVIPYXuNMW5PxTD7H776+GTP7dGnGbDl1vp3PFDUZj4g4de/eia/vOHqueYyR7ynTuDrTNH5x385mzOz8mXY+q7C5dQ8jG1dnrvlLcxubMZn1IrPuZOZFL5lzOKqZ+7TtESP1ynnUtRo9Vksml4xMvtN2rTJrSubZYWHPTDMms1+3ZPbP9XyfZ0zb+WnN5czcmnvoiS65TJvH6qFjtdbdrbgNrYBa6wdf56XpqSgBAABYNb7nv6wIAAAALqcQBQAAYCiFKAAAAEMpRAEAABhKIQoAAMBQClEAAACGUogCAAAwVLOPaE8Xt2+Os/snNyAe2ei+13FaTZUzzd4zeuXbq/l8plH0Dfc82WWsUc3BszINuQ8eeHji63fFnc1jzD3ezqVX8+bUOU6MlbmHn3twazNm5tm9zZhejaBHNcDOzJsTt19sxtx4/672WJ2u+ZHTzzRj9s/f2oxZvKk2Y2bnmyFDG7Vnzk+P9X81Np/P5JxZ23vtj5tOvtiMuZA4Tua9xxTtRwt7ZpoxvdbJXvO0136emTuZ63nw0cPNmAeO3tYe676FyQF3tB+5U/MvIbPXZK5Dr+NkTNs6mJG5/3oY9YwSMXbdjvCJKAAAAIMpRAEAABhKIQoAAMBQClEAAACGUogCAAAwlEIUAACAoRSiAAAADKUQBQAAYKhSa7vJeC/XlW315nLLsPFaRjWI7TXOtDUZz+Rz/O7rmzGzT5dmzJZT59v5TFGT8YiIU/funfj6jqPnmscY+Z4yjaszTeMX9+1sxszOn2nnswqbW/cwsnF15pq/NLexGZNZLzLrTmZe9JI5h6OauU/bHjFSr5xHXavRY7VkcsnI5Dtt1yqzpmSeHRb2zDRjMvt1S2b/XM/3eca0nZ/WXM7MrbmHnuiSy7R5rB46Vmvd3YrziSgAAABDKUQBAAAYSiEKAADAUApRAAAAhlKIAgAAMJRCFAAAgKEUogAAAAylEAUAAGCoDSMHu7h9c5zdP7kB8chG972O02qqnGn2ntEr317N5zONom+458kuY41qDp6Vach98MDDE1+/K+5sHmPu8XYuvZo3p85xYqzMPfzcg1ubMTPP7m3G9GoEPaoBdmbenLj9YjPmxvt3tcfqdM2PnH6mGbN//tZmzOJNtRkzO98MGdqoPXN+eqz/q7H5fCbnzNrea3/cdPLFZsyFxHEy7z2maD9a2DPTjOm1Tvaap73288zcyVzPg48ebsY8cPS29lj3LUwOuKP9yJ2afwmZvSZzHXodJ2Pa1sGMzP3Xw6hnlIix63aET0QBAAAYTCEKAADAUApRAAAAhlKIAgAAMJRCFAAAgKEUogAAAAylEAUAAGAohSgAAABDlVrbTcZ7ua5sqzeXW4aN1zKqQWyvcaatyXgmn+N3X9+MmX26NGO2nDrfzmeKmoxHRJy6d+/E13ccPdc8xsj3lGlcnWkav7hvZzNmdv5MO59V2Ny6h5GNqzPX/KW5jc2YzHqRWXcy86KXzDkc1cx92vaIkXrlPOpajR6rJZNLRibfabtWmTUl8+ywsGemGZPZr1sy++d6vs8zpu38tOZyZm7NPfREl1ymzWP10LFa6+5WnE9EAQAAGKpZiJZSPlZKeaGU8rnLvvdPSymnSinPLP3v/d/bNAEAAFgrMp+I/lpEvO81vv8LtdZ3L/3vk33TAgAAYK1qFqK11s9ExNcH5AIAAMA6sJyfEb2rlPLflv7p7pteL6iUcqCU8lQp5alXYvk/7A0AAMDqdrWF6K9ExB+PiHdHxJmI+JevF1hrfaTWurvWuvvaaP/2KAAAANa2qypEa60LtdaLtdZvRcSjEfGevmkBAACwVl1VIVpKubxR4F+KiM+9XiwAAABcbkMroJTyiYh4b0TMllKej4ifi4j3llLeHRE1Ik5GxN/KDHZx++Y4u39yA+KRje57HafVVDnT7D2jV769ms9nGkXfcM+TXcYa1Rw8K9OQ++CBhye+flfc2TzG3OPtXHo1b06d48RYmXv4uQe3NmNmnt3bjOnVCHpUA+zMvDlx+8VmzI3372qP1emaHzn9TDNm//ytzZjFm2ozZna+GTK0UXvm/PRY/1dj8/lMzpm1vdf+uOnki82YC4njZN57TNF+tLCn/SNPvdbJXvO0136emTuZ63nw0cPNmAeO3tYe676FyQF3NB+5c/MvIbPXZK5Dr+NkTNs6mJG5/3oY9YwSMXbdjkgUorXWD77Gtz/aLQMAAADWleX81lwAAAC4YgpRAAAAhlKIAgAAMJRCFAAAgKEUogAAAAylEAUAAGAohSgAAABDlVrbTcZ7ua5sqzeXW4aN1zKqQWyvcaatyXgmn+N3X9+MmX26NGO2nDrfzmeKmoxHRJy6d+/E13ccPdc8xsj3lGlcnWkav7hvZzNmdv5MO59V2Ny6h5GNqzPX/KW5jc2YzHqRWXcy86KXzDkc1cx92vaIkXrlPOpajR6rJZNLRibfabtWmTUl8+ywsGemGZPZr1sy++d6vs8zpu38tOZyZm7NPfREl1ymzWP10LFa6+5WnE9EAQAAGEohCgAAwFAKUQAAAIZSiAIAADCUQhQAAIChFKIAAAAMpRAFAABgKIUoAAAAQ20YOdjF7Zvj7P7JDYhHNrrvdZxWU+VMs/eMXvn2aj6faRR9wz1PdhlrVHPwrExD7oMHHp74+l1xZ/MYc4+3c+nVvDl1jhNjZe7h5x7c2oyZeXZvM6ZXI+hRDbAz8+bE7RebMTfev6s9VqdrfuT0M82Y/fO3NmMWb6rNmNn5ZsjQRu2Z89Nj/V+NzeczOWfW9l7746aTLzZjLiSOk3nvMUX70cKemWZMr3Wy1zzttZ9n5k7meh589HAz5oGjt7XHum9hcsAd7Ufu1PxLyOw1mevQ6zgZ07YOZmTuvx5GPaNEjF23I3wiCgAAwGAKUQAAAIZSiAIAADCUQhQAAIChFKIAAAAMpRAFAABgKIUoAAAAQylEAQAAGKrU2m4y3st1ZVu9udwybLyWUQ1ie40zbU3GM/kcv/v6Zszs06UZs+XU+XY+U9RkPCLi1L17J76+4+i55jFGvqdM4+pM0/jFfTubMbPzZ9r5rMLm1j2MbFydueYvzW1sxmTWi8y6k5kXvWTO4ahm7tO2R4zUK+dR12r0WC2ZXDIy+U7btcqsKZlnh4U9M82YzH7dktk/1/N9njFt56c1lzNza+6hJ7rkMm0eq4eO1Vp3t+J8IgoAAMBQClEAAACGUogCAAAwlEIUAACAoRSiAAAADKUQBQAAYCiFKAAAAEMpRAEAABiq1FrHDVbK1yLiS5d9azYiFoclAN875jJrhbnMWmAes1aYy6xGP1hr/f5W0NBC9LsGL+WpWuvuFUsAOjGXWSvMZdYC85i1wlxmLfNPcwEAABhKIQoAAMBQK12IPrLC40Mv5jJrhbnMWmAes1aYy6xZK/ozogAAAKw/K/2JKAAAAOuMQhQAAIChFKIAAAAMpRAFAABgKIUoAAAAQ/0/hrcQULhL8WYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1152x230.4 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(hmv.transform(mols))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hashed topological torsion fingerprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "from molvecgen.vectorizers import HashedTorsionVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "hmv = HashedTorsionVectorizer(nBits=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a9845fe10>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6IAAAB8CAYAAABzJA1FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAD+lJREFUeJzt3V+IXVcVx/HfMqEOUyw29g9VRy1SC1WcUC5VESQi2j8vUVBoH7SIMj6Yd9Mn+2YfFEHUQsXQ9sEWGSgWKY6dguRFmXagjKnYaahVY0JjrYhSUBqXD3Oi0+PM3Jtz9581+34/EGbuzZl9Vva/Oyvn7H3M3QUAAAAAQClvqh0AAAAAAGC2kIgCAAAAAIoiEQUAAAAAFEUiCgAAAAAoikQUAAAAAFAUiSgAAAAAoKjiiaiZ3WZmz5vZaTM7Xvr8wCwxs5fM7Ndm9qyZPdO9d8jMnjSzF7qvV9aOE9jvzOyEmZ03s1Pb3ttxrNmW73SfgxtmdnO9yIH9bZexd6+Z/an77HvWzO7Y9nf3dGPveTO7tU7UAKTCiaiZHZD0PUm3S7pJ0l1mdlPJGIAZ9HF3P+zuo+71cUlPufsNkp7qXgOYzoOSbuu9t9tYu13SDd2fJUn3F4oRaNGD+v+xJ0nf7j77Drv7E5LU/c55p6T3dz/z/e53UwAVlL4ieouk0+7+orv/S9Kjko4WjgGYdUclPdR9/5CkT1eMBWiCu5+U9Grv7d3G2lFJD/uWX0l6q5ldVyZSoC27jL3dHJX0qLv/091/J+m0tn43BVBB6UT0HZL+uO31me49AHm4pJ+b2bqZLXXvXevu5ySp+3pNteiAtu021vgsBPI71t36fmLbEhTGHhBI6UTUdnjPC8cAzJKPuvvN2roV8Ktm9rHaAQHgsxDI7H5J75V0WNI5Sd/q3mfsAYGUTkTPSFrY9vqdks4WjgGYGe5+tvt6XtJj2roF6eWLtwF2X8/XixBo2m5jjc9CICN3f9ndL7j7vyX9QP+7/ZaxBwRSOhF9WtINZna9mV2mrQXjjxeOAZgJZna5mb3l4veSPiXplLbG3N3dYXdL+kmdCIHm7TbWHpf0hW733A9L+tvFW3gBTK+35voz2vrsk7bG3p1m9mYzu15bG4atlY4PwJaDJU/m7q+b2TFJK5IOSDrh7s+VjAGYIddKeszMpK2x/iN3/5mZPS3px2b2JUl/kPS5ijECTTCzRyQdkXSVmZ2R9HVJ92nnsfaEpDu0tVHKa5K+WDxgoBG7jL0jZnZYW7fdviTpK5Lk7s+Z2Y8l/UbS65K+6u4XasQNQDJ3bo0HAAAAAJRT+tZcAAAAAMCMIxEFAAAAABRFIgoAAAAAKIpEFAAAAABQFIkoAAAAAKCoKomomS3VOC8w6xh7QD2MP6AOxh4Q01SJqJndZmbPm9lpMzt+CT/KhADUwdgD6mH8AXUw9oCABieiZnZA0vck3S7pJkl3mdlNqQIDAAAAALTp4BQ/e4uk0+7+oiSZ2aOSjkr6zW4/cJm92ed0ueY0ryvskE9x7lDe98HX3vB6c2N+6jJql4P8crb5buXuNfZqxHMp5Qztx7nK2cm4snPVzdBysLdUfeeiaT77oo3PVpUao7M8zmv8u0r83pl6vphGtL4T7fM8lUjxRPv9/+/66yvufvW448x92Lg0s89Kus3dv9y9/rykD7n7sd1+5go75B+yTww6X2QrZ599w+tb33546jJql4P8crb5kHKjxzO0H+cqZyfjys7ZVkPKwd5S9Z0Uoo3PVpUao7M8zmfl3xVtvugrGV+0z/NUIsUT7ff/VV9ed/fRuOOmuSJqO7z3f1ltt0B8Sdr6HykAAAAAwGyb5oroRyTd6+63dq/vkSR3/8ZuPzNanPO1lYU9y639vxtIq9X/lUshWt2kEu2qX/Q2349t3NdqX44k2p0PqUQanynlulOqr5X6Qlol76DYCf1yctHnZGlYPJNeEZ1m19ynJd1gZteb2WWS7pT0+BTlAQAAAABmwOBbc939dTM7JmlF0gFJJ9z9uWSRAQAAAACaNM0aUbn7E5KeSBQLAAAAAGAGTHNrLgAAAAAAl2yqK6KXanNjvskFzDy+ZXfR2jtaPH2RHpcyNJ5caseSa+OQIfNHtE1MardNLtE30kkVT81/Z7Q6jTTOWxVt/kolehsPiS/aHBOtjlPEk6uOh5Zduk65IgoAAAAAKIpEFAAAAABQFIkoAAAAAKAoc/diJxstzvnaysKex9S+3xtpRVsLEml9QbS6SSXnmtUU5URr8/3Yxn2t9uVIWl0LHml8ppRr74i+VuoLaeXcd2QS9MvJRZ+TpWHxrPryuruPxh3HFVEAAAAAQFEkogAAAACAokhEAQAAAABFkYgCAAAAAIo6WPJkmxvzTS5gzrUpQc1yUonW3tHi6au5oUW0vtNXO5ZID7qPtolJ7bbJJfpGOtEeNj9EtDqNNM5bFW3+SiV6Gw+JL9ocE62OU8STq46Hll26TrkiCgAAAAAoikQUAAAAAFAUiSgAAAAAoChz9+E/bPaSpL9LuiDp9XEPLh0tzvnaysKeZda+3xtpRVsLEml9QbS6SSXnmtUU5URr8/3Yxn2t9uVIWl0LHml8ppRr74i+VuoLaeXcd2QS9MvJRZ+TpWHxrPry+ri8UEqzWdHH3f2VBOUAAAAAAGYAt+YCAAAAAIqaNhF1ST83s3UzW0oREAAAAACgbdPemvtRdz9rZtdIetLMfuvuJ7cf0CWoS5L0rncUfWwpAAAAACCgqTJDdz/bfT1vZo9JukXSyd4xD0h6QJKusEPe4gLmXJsS1CwnlWjtHS2evpobWkTrO321Y4n0oPtom5jUbptcom+kE+1h80NEq9NI47xV0eavVKK38ZD4os0x0eo4RTy56nho2aXrdPCtuWZ2uZm95eL3kj4l6VSqwAAAAAAAbZrmiui1kh4zs4vl/Mjdf5YkKgAAAABAswYnou7+oqTFhLEAAAAAAGaAuXuxk40W53xtZWHPY2rf7420oq0FibS+IFrdpJJzzWqKcqK1+X5s475W+3Ikra4FjzQ+U8q1d0RfK/WFtHLuOzIJ+uXkos/J0rB4Vn153d1H447jOaIAAAAAgKJIRAEAAAAARZGIAgAAAACKIhEFAAAAABQ1zeNbLtnmxnyTC5hzbUpQs5xUorV3tHj6am5oEa3v9NWOJdKD7qNtYlK7bXKJvpFOtIfNDxGtTiON81ZFm79Sid7GQ+KLNsdEq+MU8eSq46Fll65TrogCAAAAAIoiEQUAAAAAFEUiCgAAAAAoyty92MlGi3O+trKw5zG17/dGWtHWgkRaXxCtblLJuWY1RTnR2nw/tnFfq305klbXgkcanynl2juir5X6Qlo59x2ZBP1yctHnZGlYPKu+vO7uo3HHcUUUAAAAAFAUiSgAAAAAoCgSUQAAAABAUWMTUTM7YWbnzezUtvcOmdmTZvZC9/XKvGECAAAAAFpxcIJjHpT0XUkPb3vvuKSn3P0+Mzvevf7auII2N+abXMCca1OCmuWkEq29o8XTV3NDi2h9p692LJEedB9tE5PabZNL9I10oj1sfohodRppnLcq2vyVSvQ2HhJftDkmWh2niCdXHQ8tu3Sdjr0i6u4nJb3ae/uopIe67x+S9OnEcQEAAAAAGjV0jei17n5Okrqv16QLCQAAAADQskluzZ2KmS1JWpKkOc3nPh0AAAAAIDhz9/EHmb1H0k/d/QPd6+clHXH3c2Z2naRfuPuN48oZLc752srCnsfUvt8baUVbCxJpfUG0ukkl55rVFOVEa/P92MZ9rfblSFpdCx5pfKaUa++IvlbqC2nl3HdkEvTLyUWfk6Vh8az68rq7j8YdN/TW3Mcl3d19f7eknwwsBwAAAAAwYyZ5fMsjkn4p6UYzO2NmX5J0n6RPmtkLkj7ZvQYAAAAAYKyxa0Td/a5d/uoTiWMBAAAAAMyAobfmAgAAAAAwSPZdc7fb3JhvcgFzrk0JapaTSrT2jhZPX80NLaL1nb7asUR60H20TUxqt00u0TfSifaw+SGi1Wmkcd6qaPNXKtHbeEh80eaYaHWcIp5cdTy07NJ1yhVRAAAAAEBRJKIAAAAAgKJIRAEAAAAARZm7FzvZaHHO11YW9jym9v3eSCvaWpBI6wui1U0qOdespignWpvvxzbua7UvR9LqWvBI4zOlXHtH9LVSX0gr574jk6BfTi76nCwNi2fVl9fdfTTuOK6IAgAAAACKIhEFAAAAABRFIgoAAAAAKIpEFAAAAABQ1MGSJ9vcmG9yAXOuTQlqlpNKtPaOFk9fzQ0tovWdvtqxRHrQfbRNTGq3TS7RN9KJ9rD5IaLVaaRx3qpo81cq0dt4SHzR5phodZwinlx1PLTs0nXKFVEAAAAAQFEkogAAAACAosYmomZ2wszOm9mpbe/da2Z/MrNnuz935A0TAAAAANAKc/e9DzD7mKR/SHrY3T/QvXevpH+4+zcv5WSjxTlfW1nY85ja93sjrWhrQSKtL4hWN6nkXLOaopxobb4f27iv1b4cSatrwSONz5Ry7R3R10p9Ia2c+45Mgn45uehzsjQsnlVfXnf30bjjxl4RdfeTkl695AgAAAAAANjBNGtEj5nZRnfr7pXJIgIAAAAANG1oInq/pPdKOizpnKRv7XagmS2Z2TNm9syf/3Jh4OkAAAAAAK0YlIi6+8vufsHd/y3pB5Ju2ePYB9x95O6jq992YGicAAAAAIBGHBzyQ2Z2nbuf615+RtKpvY6/aHNjvskFzLk2JahZTirR2jtaPH01N7SI1nf6ascS6UH30TYxqd02uUTfSCfaw+aHiFankcZ5q6LNX6lEb+Mh8UWbY6LVcYp4ctXx0LJL1+nYRNTMHpF0RNJVZnZG0tclHTGzw5Jc0kuSvpIxRgAAAABAQ8Ymou5+1w5v/zBDLAAAAACAGTDNrrkAAAAAAFwyc/diJxstzvnaysKex9S+3xtpRVsLEml9QbS6SSXnmtUU5URr8/3Yxn2t9uVIWl0LHml8ppRr74i+VuoLaeXcd2QS9MvJRZ+TpWHxrPryuruPxh3HFVEAAAAAQFEkogAAAACAokhEAQAAAABFkYgCAAAAAIoa+/iWlDY35ptcwJxrU4Ka5aQSrb2jxdNXc0OLaH2nr3YskR50H20Tk9ptk0v0jXSiPWx+iGh1Gmmctyra/JVK9DYeEl+0OSZaHaeIJ1cdDy27dJ1yRRQAAAAAUBSJKAAAAACgKBJRAAAAAEBR5u7FTjZanPO1lYU9j6l9vzfSirYWJNL6gmh1k0rONaspyonW5vuxjfta7cuRtLoWPNL4TCnX3hF9rdQX0sq578gk6JeTiz4nS8PiWfXldXcfjTuOK6IAAAAAgKJIRAEAAAAARZGIAgAAAACKIhEFAAAAABRVdLMiM/uzpN9LukrSK8VODOAixh5QD+MPqIOxB5T1bne/etxBRRPR/57U7JlJdlICkBZjD6iH8QfUwdgDYuLWXAAAAABAUSSiAAAAAICiaiWiD1Q6LzDrGHtAPYw/oA7GHhBQlTWiAAAAAIDZxa25AAAAAICiSEQBAAAAAEWRiAIAAAAAiiIRBQAAAAAURSIKAAAAACjqPzBOXCTWjwJ7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1152x144 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.matshow(hmv.transform(mols))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### RDkit Fingerprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x2b5a984b0f28>"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6IAAAA3CAYAAAAWu/fqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAHM5JREFUeJztnX3Mt2VZx79HIBKaAWoOEEJTK2aC9syXbM60iZYTbbJ0vZBZtNbKWq3If6wtNm1Oq+XcSFEsoxyyZNZkzFzami+gTkEyGRkSBCr4StOwsz9+1w/PDr/f4zyv53n4Pfct38/G7ut3XufL9zjP4zjO87qe+3cTrTUYY4wxxhhjjDG74juOtABjjDHGGGOMMfct/CBqjDHGGGOMMWan+EHUGGOMMcYYY8xO8YOoMcYYY4wxxpid4gdRY4wxxhhjjDE7xQ+ixhhjjDHGGGN2ys4eRCPi2RHxyYi4ISIu2NW4xoyIiFMj4j0RcX1EXBcRL1vKT4yIqyLiU8vPE5byiIg/W3z5YxHxhCNrgbkvExFHRcRHIuKdy+dHRMQHFr/924g4Zim///L5huX+6UdSt7lvExHHR8RlEfGvS+59inOu2etExG8t54RrI+LSiDjWOdfsRSLi4oi4PSKu7cpW59iIOG+p/6mIOO9w69zJg2hEHAXgdQCeA+AMAC+OiDN2MbYxE9wN4Ldbaz8I4MkAfm3xzwsAvLu19mgA714+Axs/fvTy3/kAXr97ycbcw8sAXN99fhWA1y5+eyeAly7lLwVwZ2vtUQBeu9Qz5kjxpwDe1Vr7AQBnYuPDzrlmzxIRpwD4DQAHWmuPBXAUgBfBOdfsTd4M4NmpbFWOjYgTAbwCwJMAPBHAK7YPr4eLXf2L6BMB3NBau7G19nUAfwPgnB2NbUxJa+3W1tqHl+svY3MgOgUbH71kqXYJgOcv1+cAeEvb8H4Ax0fESTuWbQwi4uEAfhLAG5bPAeAZAC5bqmS/3frzZQCeudQ3ZqdExIMAPA3AGwGgtfb11toX4Jxr9j5HA/jOiDgawHEAboVzrtmDtNbeC+COVLw2x54N4KrW2h2ttTsBXIVvfbg9JHb1IHoKgM90n29eyozZUyy/OvN4AB8A8LDW2q3A5mEVwPcs1ezPZq/wJwB+F8D/Lp8fDOALrbW7l8+9b97jt8v9Ly71jdk1jwTwWQBvWn6t/A0R8QA455o9TGvtPwG8GsBN2DyAfhHANXDONfuHtTn2Xs+9u3oQZW+A2o7GNmaKiHgggLcD+M3W2peqqqTM/mx2SkQ8F8DtrbVr+mJStU3cM2aXHA3gCQBe31p7PICv4pu/Isaw75ojzvIriecAeASAkwE8AJtfacw455r9hvLVe92Hd/UgejOAU7vPDwdwy47GNmZIRNwPm4fQt7bWLl+Kb9v++tfy8/al3P5s9gJPBfC8iPg0Nl93eAY2/0J6/PJrY8D/9817/Ha5/9341l/bMWYX3Azg5tbaB5bPl2HzYOqca/YyPw7g31trn22t/Q+AywH8CJxzzf5hbY6913Pvrh5EPwTg0ctfFjsGmy93X7GjsY0pWb6z8UYA17fWXtPdugLA9i+EnQfgHV35zy9/ZezJAL64/VUHY3ZFa+33W2sPb62djk1O/cfW2s8AeA+AFy7Vst9u/fmFS32/nTc7p7X2XwA+ExHfvxQ9E8An4Jxr9jY3AXhyRBy3nBu2fuuca/YLa3PslQCeFREnLL8R8Kyl7LARu4qJiPgJbN7WHwXg4tbahTsZ2JgBEfGjAN4H4OP45nftXo7N90TfBuA0bDagc1trdywb0J9j84XtuwC8pLV29c6FG7MQEU8H8DuttedGxCOx+RfSEwF8BMDPtta+FhHHAvhLbL4DfQeAF7XWbjxSms19m4g4C5s/snUMgBsBvASbl+POuWbPEhF/COCnsflr+x8B8EvYfGfOOdfsKSLiUgBPB/AQALdh89dv/w4rc2xE/CI2Z2IAuLC19qbDqtMvZ4wxxhhjjDHG7JJd/WquMcYYY4wxxhgDwA+ixhhjjDHGGGN2jB9EjTHGGGOMMcbsFD+IGmOMMcYYY4zZKX4QNcYYY4wxxhizUw7pQTQinh0Rn4yIGyLigon65x/KeMYcKey7Zj9ivzX7Ffuu2Y/Yb81+5Ej67UE/iEbEUQBeB+A5AM4A8OKIOGPQzAFq9iv2XbMfsd+a/Yp91+xH7LdmP7L/HkQBPBHADa21G1trX8fmf+Z7zuGRZYwxxhhjjDHm25WjD6HtKQA+032+GcCTcqXln3vPB4D7H4MfflCc2B7zuLvwbx87DvlnT1/2mMfddU/5tn5P7quq1/ffl1Ua2E/VXx6btcvjZpimag5UX0pHtj/bXn1m8zYzF2ze2Fox20ZjzNiq/Kyyr293LI7D1nereavGZ/WyBrXmvZ2VTaofVSfP36zOXCfrYzB72PijtRvpHmldGxejPLVGy9p4qfqsxtjWP+2Uo3HgzGNbtd4za1r5E7OB3VNxPlprte7ZbvZ51jeyzgoVl2qfqO5X88DmpMr7VU4e2cza97pH99fM2ShfbcuPxXE4cOaxTY3D5k/ZqHwgU+UD1n4291Zxn9c161H29vcPdi9iumf9c5QXZ/enkX7GmrXr77Hrar7UfFZ1t367Zm9hc8fma9Rf7mMUq9Uaj2B+q3JVNXez/lPtU5UvzdiqYnDWJ6tcVK0n6yePP/IB5Q+j9lnDaaccjf75bHbeKt3XfOxrn2utPfRbDE1Ea21UhzeMOBfAxQA+DeAbAB4E4O9ba7+u2hw489j2wStPBQCcffJZuPKWj97zc1sG4J7PmZn72z639HXZmJUWVpeNp8ZmGqo+qrKqvRorz1f/melUepUeNs9sbtf2y8ap1ifblK9HNlTlo/WpfE75NvNjNW8zvjP6OTN+ZVvFbEyqNpW/jPQq/1Z9z2hj/YzGYjqZf64dfzYnVXOctWUtzEcORu9IT9VmlGuVLaO47esoO1mdg4lF1X5GD5uTSvMob8zYMPLxtXso6/NQ4q/Kk0pz7nO2f9a+mtfevhl7Z2NsNs8x22bPKTP1Vf+9hsr2aj9SdqzZh0d7fh5jts816zG754z0VX5V+dhMjlGalW6mq5qH3PcoNtdQrfvMGDN75SguZv2Elc/khqpsjU6mJY+9vR61Z3vj2v227/uok264prV2gArtOJRfzb0Zm39R/bHW2lkA/gLALaNG2VBWvr0+++SzppKLOljMTDT7zDT2mlj9POaVt3xUOiZrV41Xae7bs76qDXo0T73GrKN3PHafaVC2s377z3ke83z3zl8lr2xXHlfpz/fzBsHmYmRPP2c5mEeBX/nnqHwmAbH16cfNutn6sPpbmO+N4pz52Ci+8hznunnO87yrg4CyhfWZNeXrtevD1qDqL2ur4oK1qXRXMVPNu6qv1iPryn6p6ivN2UaWi9fmiF6/2ryzP/XamNbcN9Ot+mK+P4qhnuzXrH+lN5erHDxTd42/5phnffeftzbOnivUmuSxlS2Kfn2qOc91+3GqXDeak1w2Gm+L2tdHNrIxlX6lV52zZvylr5tjX8Vff60eEJRd/XXlM1X/LLcomA1qX2Jnp2yXihG2FtkOla+q+GS5VM01O0uxMWb2hmyDOpNlrWpdVCxnjbmfvi3LKzP2sbNwbp/HZ7pzezZeXusZH80cyoPoh7B5ED0tIo4B8CIAVxxCf8YYY4wxxhhj7gMc9HdEW2t3R8TnAfwLgADwD62160btqjew1RsWQL9Rr9485HvqXzlyX+rtBuuvH0u9WRy1UzZV9/I46u1F/3aE2azeFlb95resat5YWfWWUI0zelNUXSu7em3KL1k79saQzUVlT9WnegtY6VRvoUb+z+pV68N092XK/3JZr6V6czrTD7OLvQ1U9imb2NtE9eac+aXKLZW+an1y30xXNf/5LbIal/l3vh6t1yhvVfapuizmc9+judvWUTm/sm9Wfx6ryvXqs8pnLE9kDWysnE+qPbK/z2zs2420jOIv25zHns1B/R43s55VbKqyag+d8V+Fmg81TrY528o0j3KP0l1pq/Z1pYP5b+UrTG/fjzp/zOybzFdU/DEtMz6g/IXZX7Wp9hU2X1m30lXN4+ycqniqzga5TO1bTEcem7XvNYzydmV3lVOY3lxWjTVjd5VXqlxT5SmmSelWfps/szPdGob/IhoRF0fE7RFxbVd2YkRcBeAuAO8D8FgAj4yIp5H250fE1RFx9Wc//w0ZQHnz2f4cbRa5bV9vZoNln5nGfPDM9VlgVUk7t6vGqzRXTt2XjZIuGz8nfLZGfbCotWXBphIMszXPY57vKnDVvDN7quTe38/ByeZiZE8/ZzmRV8l15J+j8iqR5jHyxp99MvtRvlfFMdsA+vsZ5mOj+MpznOvmOWcbJ0vKyhbWZ9aUr9euD1uDqr+srYoL1qbSXcVMNe+qvlqPrCv7paqvNGcbWS5emyN6/aNDLNPGtOa+mW7VF/P9UQz1ZL9m/Su9uVzl4Jm6a/w1xzzru/+8tXH2XKHWJI+tbFH06zM6zGa/yvsHm9vRnOSy0Xhb1L4+spGNqfQrveqcNeMvfd0c+yr++mv1UKLs6q8rn6n6Z7lFwWxQ+xI7O2W7VIywtch2qHxVxSfLpWqu2VmKjTGzN2Qb1Jksa1XromI5a8z99G1ZXpmxj52Fc/s8PtOd27Px8lrP+Ghm+MeKlofLrwB4S2vtsUvZHwO4o7X2yoi4AMAJAP4bwFdaa69WffmPFdV9VGVVezWW2kRGjjKrh81z5fgHa+dofbJN7CBY2VCVj9an8jnl28yPq0Q7Oy/q58z4lW0VszGp2owSZaVX+bfqe0Yb62c0FtPJ/HPt+LM5qZrjrC1rYT5yMHpHeqo2o1yrbBnFbV9H2cnqHEwsqvYzeticVJpHeWPGhpGPr91DWZ+HEn9VnlSac5+z/bP21bz29s3YOxtjs3mO2TZ7Tpmpr/rvNVS2V/uRsmPNPjza8/MYs32uWY/ZPWekr/KrysdmcozSrHQzXdU85L5HsbmGat1nxpjZK0dxMesnrHwmN1Rla3QyLXns7fWoPdsb1+63fd+H7Y8VtdbeC+COVHwOgLdFxHcBuATACwA8C8C1GJANZeXb67NP1k/qVZ/bejMTzT4zjb0mVj+PeeUtH5WOydpV41Wa+/asr2qDHs1TrzHr6B2P3WcalO2s3/5znsc8373zV8kr25XHVfrz/bxBsLkY2dPPWQ7mUeBX/jkqn0lAbH36cbNutj6s/hbme6M4Zz42iq88x7lunvM87+ogoGxhfWZN+Xrt+rA1qPrL2qq4YG0q3VXMVPOu6qv1yLqyX6r6SnO2keXitTmi16827+xPvTamNffNdKu+mO+PYqgn+zXrX+nN5SoHz9Rd46855lnf/eetjbPnCrUmeWxli6Jfn2rOc91+nCrXjeYkl43G26L29ZGNbEylX+lV56wZf+nr5thX8ddfqwcEZVd/XflM1T/LLQpmg9qX2Nkp26VihK1FtkPlqyo+WS5Vc83OUmyMmb0h26DOZFmrWhcVy1lj7qdvy/LKjH3sLJzb5/GZ7tyejZfXesZHMwf7HdGHLT//efn5fQAuaa29a9Sw2vgqxwb0QaZa8HxPHS5zX8qpWH/9WCqhj9opm6p7eRzlNL1TMptVkq76zZubmjdWViVnNc4oQKtrZVevTfkla8cSNZuLyp6qT5V8K50q+Ef+z+pV68N092XK/3JZr6XasGb6YXaxJKzsUzaxJK4OLMwvVW6p9FXrk/tmuqr5z5u3Gpf5d74erdcob1X2qbos5nPfo7nb1lE5v7JvVn8eq8r16rPKZyxPZA1srJxPqj2yv89s7NuNtIziL9ucx57NQf0eN7OeVWyqsmoPnfFfhZoPNU62OdvKNI9yj9Jdaav2daWD+W/lK0xv3486f8zsm8xXVPwxLTM+oPyF2V+1qfYVNl9Zt9JVzePsnKp4qs4GuUztW0xHHpu17zWM8nZld5VTmN5cVo01Y3eVV6pcU+UppknpVn6bP7Mz3RoO+q/mttZubK2d2Vo7E8CXWmsXsnr+jmg9zsgmpkfNyWjTr5IuGz8nfLZGfbCotWXBphIMszXPY57vKnDVvDN7quTe38/ByeZiZE8/ZzmRV8l15J+j8iqR5jHyxp99MvtRvlfFMdsA+vsZ5mOj+MpznOvmOWcbJ0vKyhbWZ9aUr9euD1uDqr+srYoL1qbSXcVMNe+qvlqPrCv7paqvNGcbWS5emyN6/aNDLNPGtOa+mW7VF/P9UQz1ZL9m/Su9uVzl4Jm6a/w1xzzru/+8tXH2XKHWJI+tbFH06zM6zGa/yvsHm9vRnOSy0Xhb1L4+spGNqfQrveqcNeMvfd0c+yr++mv1UKLs6q8rn6n6Z7lFwWxQ+xI7O2W7VIywtch2qHxVxSfLpWqu2VmKjTGzN2Qb1Jksa1XromI5a8z99G1ZXpmxj52Fc/s8PtOd27Px8lrP+Ghm+C+iEXExgOcBeGBXfHdE3ArgtqWPr6r2rbWLAFwEbL4jeuXVeuOrHBvQB5lqwfM9dbjMfSmnYv31Y6mEPmqnbKru5XGU0/ROyWxWSbrqN29uat5YWZWc1TijAK2ulV29NnUYYe1YomZzUdlT9amSb6VTBf/I/1m9an2Y7r5M+V8u67VUG9ZMP8wuloSVfcomlsTVgYX5pcotlb5qfXLfTFc1/3nzVuMy/87Xo/Ua5a3KPlWXxXzuezR32zoq51f2zerPY1W5Xn1W+YzliayBjZXzSbVH9veZjX27kZZR/GWb89izOajf42bWs4pNVVbtoTP+q1DzocbJNmdbmeZR7lG6K23Vvq50MP+tfIXp7ftR54+ZfZP5ioo/pmXGB5S/MPurNtW+wuYr61a6qnmcnVMVT9XZIJepfYvpyGOz9r2GUd6u7K5yCtOby6qxZuyu8kqVa6o8xTQp3cpv82d2plvDzL+IvhnAeanskwA+3Fo7C8BfAfjr2QFVAOXNZ/tztFmwPmcWqPrMNOaDZ67PAqtK2rldNV6luXLqvmyUdNn4OeGzNeqDRa0tCzaVYJiteR7zfFeBq+ad2VMl9/5+Dk42FyN7+jnLibxKriP/HJVXiTSPkTf+7JPZj/K9Ko7ZBtDfzzAfG8VXnuNcN8852zhZUla2sD6zpny9dn3YGlT9ZW1VXLA2le4qZqp5V/XVemRd2S9VfaU528hy8doc0esfHWKZNqY19810q76Y749iqCf7Netf6c3lKgfP1F3jrznmWd/9562Ns+cKtSZ5bGWLol+f0WE2+1XeP9jcjuYkl43G26L29ZGNbEylX+lV56wZf+nr5thX8ddfq4cSZVd/XflM1T/LLQpmg9qX2Nkp26VihK1FtkPlqyo+WS5Vc83OUmyMmb0h26DOZFmrWhcVy1lj7qdvy/LKjH3sLJzb5/GZ7tyejZfXesZHMzPfEf1VAM8EcP+IuBnAK7D5buhPRcSnANwE4NxVoxpjjDHGGGOMuc8yfBBtrb04Ik4H8M7uf9/yBwDuB+BLAP4DgPx/wETE+QDOB4DTTjkaHyzewFZvWAD9Rr1685DvqX/lyH2ptxusv34s9WZx1E7ZVN3L46i3F/3bEWazeltY9Zvfsqp5Y2XVW0I1zuhNUXWt7Oq1qbfirB17Y8jmorKn6lO9Bax0qrdQI/9n9ar1Ybr7MuV/uazXUr05nemH2cXeBir7lE3sbaJ6c878UuWWSl+1Prlvpqua//wWWY3L/Dtfj9ZrlLcq+1RdFvO579HcbeuonF/ZN6s/j1XlevVZ5TOWJ7IGNlbOJ9Ue2d9nNvbtRlpG8ZdtzmPP5qB+j5tZzyo2VVm1h874r0LNhxon25xtZZpHuUfprrRV+7rSwfy38hWmt+9HnT9m9k3mKyr+mJYZH1D+wuyv2lT7CpuvrFvpquZxdk5VPFVng1ym9i2mI4/N2vcaRnm7srvKKUxvLqvGmrG7yitVrqnyFNOkdCu/zZ/ZmW4Nw1/NjYhTAVwK4FERcV1EvAzA6wEcwOY7os8HcG1EnMDat9Yuaq0daK0deOiDj5IBlDef7c/RZpHb9vVmNlj2mWnMB89cnwVWlbRzu2q8SnPl1H3ZKOmy8XPCZ2vUB4taWxZsKsEwW/M85vmuAlfNO7OnSu79/RycbC5G9vRzlhN5lVxH/jkqrxJpHiNv/Nknsx/le1Ucsw2gv59hPjaKrzzHuW6ec7ZxsqSsbGF9Zk35eu36sDWo+svaqrhgbSrdVcxU867qq/XIurJfqvpKc7aR5eK1OaLXPzrEMm1Ma+6b6VZ9Md8fxVBP9mvWv9Kby1UOnqm7xl9zzLO++89bG2fPFWpN8tjKFkW/PqPDbParvH+wuR3NSS4bjbdF7esjG9mYSr/Sq85ZM/7S182xr+Kvv1YPJcqu/rrymap/llsUzAa1L7GzU7ZLxQhbi2yHyldVfLJcquaanaXYGDN7Q7ZBncmyVrUuKpazxtxP35bllRn72Fk4t8/jM925PRsvr/WMj2ZmfjX3bgB/BOBVAJ4C4BoAVwH4BQDvBvDLAN4P4AIAvzfqrNr4KscG9EGmWvB8Tx0uc1/KqVh//VgqoY/aKZuqe3kc5TS9UzKbVZKu+s2bm5o3VlYlZzXOKECra2VXr035JWvHEjWbi8qeqk+VfCudKvhH/s/qVevDdPdlyv9yWa+l2rBm+mF2sSSs7FM2sSSuDizML1VuqfRV65P7Zrqq+c+btxqX+Xe+Hq3XKG9V9qm6LOZz36O529ZROb+yb1Z/HqvK9eqzymcsT2QNbKycT6o9sr/PbOzbjbSM4i/bnMeezUH9HjeznlVsqrJqD53xX4WaDzVOtjnbyjSPco/SXWmr9nWlg/lv5StMb9+POn/M7JvMV1T8MS0zPqD8hdlftan2FTZfWbfSVc3j7JyqeKrOBrlM7VtMRx6bte81jPJ2ZXeVU5jeXFaNNWN3lVeqXFPlKaZJ6VZ+mz+zM90ahv8i2lq7FcB1y/WXAVwP4IcAnAPgEgAvwOZB9PkzA6oAypvP9udos2B9zixQ9ZlpzAfPXJ8FVpW0c7tqvEpz5dR92SjpsvFzwmdr1AeLWlsWbCrBMFvzPOb5rgJXzTuzp0ru/f0cnGwuRvb0c5YTeZVcR/45Kq8SaR4jb/zZJ7Mf5XtVHLMNoL+fYT42iq88x7lunnO2cbKkrGxhfWZN+Xrt+rA1qPrL2qq4YG0q3VXMVPOu6qv1yLqyX6r6SnO2keXitTmi1z86xDJtTGvum+lWfTHfH8VQT/Zr1r/Sm8tVDp6pu8Zfc8yzvvvPWxtnzxVqTfLYyhZFvz6jw2z2q7x/sLkdzUkuG423Re3rIxvZmEq/0qvOWTP+0tfNsa/ir79WDyXKrv668pmqf5ZbFMwGtS+xs1O2S8UIW4tsh8pXVXyyXKrmmp2l2Bgze0O2QZ3Jsla1LiqWs8bcT9+W5ZUZ+9hZOLfP4zPduT0bL6/1jI9mojX59c5NhYhLATwdwEMAfA7AMdj8i+i52DygfhrArwD4RGuN/npu19eXsfmLu8bsN7b+b8x+wn5r9iv2XbMfsd+a/ci94bff21p76KjS8EH0nooRDwTwTwAubK1dHhFfaK0d392/kz2I9n+sCMBDWmunTw1ozB4iIq5urR040jqMWYP91uxX7LtmP2K/NfuRI+m3M/8fUUTE/QC8HcBbW2uXL8W3RcRJy/2TANzO2vZ/rAh+S2SMMcYYY4wx93lm/mpuAHgjgOtba6/pbl0B4Lzl+jwA7zj88owxxhhjjDHGfLsx81dznwrg5wB8PCK231p9OYBXAnhbRLwUwE3YfGd0xEUHpdKYI4991+xH7Ldmv2LfNfsR+63Zjxwxv53+jqgxxhhjjDHGGHM4mPqOqDHGGGOMMcYYc7jwg6gxxhhjjDHGmJ3iB1FjjDHGGGOMMTvFD6LGGGOMMcYYY3aKH0SNMcYYY4wxxuwUP4gaY4wxxhhjjNkpfhA1xhhjjDHGGLNT/CBqjDHGGGOMMWan/B/DCLGV9JCrygAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1152x144 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from molvecgen.vectorizers import HashedRDKVectorizer\n",
    "hmv = HashedRDKVectorizer(nBits=1024)\n",
    "\n",
    "plt.matshow(hmv.transform(mols))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tf2",
   "language": "python",
   "name": "tf2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
